diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..aff151f94 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,43 @@ +# Contributor Code of Conduct + +Openness, transparency and our community-driven participatory approach guide us in our day-to-day interactions and decision-making. Our open source projects are no exception. Trust, respect, collaboration and transparency are core values we believe should live and breathe within our projects. Our community welcomes participants from around the world with different experiences, unique perspectives, and great ideas to share. + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +- Using welcoming and inclusive language +- Being respectful of differing viewpoints and experiences +- Gracefully accepting constructive criticism +- Attempting collaboration before conflict +- Focusing on what is best for the community +- Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +- Violence, threats of violence, or inciting others to commit self-harm +- The use of sexualized language or imagery and unwelcome sexual attention or advances +- Trolling, intentionally spreading misinformation, insulting/derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or electronic address, without explicit permission +- Abuse of the reporting process to intentionally harass or exclude others +- Advocating for, or encouraging, any of the above behavior +- Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), [version 1.4](https://www.contributor-covenant.org/version/1/4/code-of-conduct.html). \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..13a359c86 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,10 @@ +# Contributing to D-Net Hadoop + +:+1::tada: First off, thanks for taking the time to contribute! :tada::+1: + +This project and everyone participating in it is governed by our [Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +The following is a set of guidelines for contributing to this project and its packages. These are mostly guidelines, not rules, which applies to this project as a while, including all its sub-modules. +Use your best judgment, and feel free to propose changes to this document in a pull request. + +All contributions are welcome, all contributions will be considered to be contributed under the [project license](LICENSE.md). diff --git a/LICENSE b/LICENSE.md similarity index 100% rename from LICENSE rename to LICENSE.md diff --git a/README.md b/README.md index 2c1440f44..b6575814d 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,11 @@ Dnet-hadoop is the project that defined all the [OOZIE workflows](https://oozie.apache.org/) for the OpenAIRE Graph construction, processing, provisioning. +This project adheres to the Contributor Covenant [code of conduct](CODE_OF_CONDUCT.md). +By participating, you are expected to uphold this code. Please report unacceptable behavior to [dnet-team@isti.cnr.it](mailto:dnet-team@isti.cnr.it). + +This project is licensed under the [AGPL v3 or later version](#LICENSE.md). + How to build, package and run oozie workflows ==================== diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java new file mode 100644 index 000000000..fff28dbdf --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummary.java @@ -0,0 +1,39 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class CategorySummary { + + private String id; + + private String label; + + private boolean hasConcept; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public boolean isHasConcept() { + return hasConcept; + } + + public CategorySummary setId(final String id) { + this.id = id; + return this; + } + + public CategorySummary setLabel(final String label) { + this.label = label; + return this; + } + + public CategorySummary setHasConcept(final boolean hasConcept) { + this.hasConcept = hasConcept; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java new file mode 100644 index 000000000..7213a945a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/CategorySummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class CategorySummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java new file mode 100644 index 000000000..a576f9a1e --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummary.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.List; + +public class ConceptSummary { + + private String id; + + private String label; + + public boolean hasSubConcept; + + private List concepts; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public List getConcepts() { + return concepts; + } + + public ConceptSummary setId(final String id) { + this.id = id; + return this; + } + + public ConceptSummary setLabel(final String label) { + this.label = label; + return this; + } + + public boolean isHasSubConcept() { + return hasSubConcept; + } + + public ConceptSummary setHasSubConcept(final boolean hasSubConcept) { + this.hasSubConcept = hasSubConcept; + return this; + } + + public ConceptSummary setConcept(final List concepts) { + this.concepts = concepts; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java new file mode 100644 index 000000000..45ccd2810 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ConceptSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ConceptSummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java new file mode 100644 index 000000000..46a0d0d5a --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummary.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.dhp.common.api.context; + +public class ContextSummary { + + private String id; + + private String label; + + private String type; + + private String status; + + public String getId() { + return id; + } + + public String getLabel() { + return label; + } + + public String getType() { + return type; + } + + public String getStatus() { + return status; + } + + public ContextSummary setId(final String id) { + this.id = id; + return this; + } + + public ContextSummary setLabel(final String label) { + this.label = label; + return this; + } + + public ContextSummary setType(final String type) { + this.type = type; + return this; + } + + public ContextSummary setStatus(final String status) { + this.status = status; + return this; + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java new file mode 100644 index 000000000..618600007 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/api/context/ContextSummaryList.java @@ -0,0 +1,7 @@ + +package eu.dnetlib.dhp.common.api.context; + +import java.util.ArrayList; + +public class ContextSummaryList extends ArrayList { +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 905457bcd..342d73cdc 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -8,10 +8,13 @@ import java.io.InputStream; import java.net.*; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.lang3.time.DateUtils; import org.apache.http.HttpHeaders; +import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -94,14 +97,16 @@ public class HttpConnector2 { throw new CollectorException(msg); } - log.info("Request attempt {} [{}]", retryNumber, requestUrl); - InputStream input = null; + long start = System.currentTimeMillis(); try { if (getClientParams().getRequestDelay() > 0) { backoffAndSleep(getClientParams().getRequestDelay()); } + + log.info("Request attempt {} [{}]", retryNumber, requestUrl); + final HttpURLConnection urlConn = (HttpURLConnection) new URL(requestUrl).openConnection(); urlConn.setInstanceFollowRedirects(false); urlConn.setReadTimeout(getClientParams().getReadTimeOut() * 1000); @@ -115,9 +120,8 @@ public class HttpConnector2 { urlConn.addRequestProperty(headerEntry.getKey(), headerEntry.getValue()); } } - if (log.isDebugEnabled()) { - logHeaderFields(urlConn); - } + + logHeaderFields(urlConn); int retryAfter = obtainRetryAfter(urlConn.getHeaderFields()); String rateLimit = urlConn.getHeaderField(Constants.HTTPHEADER_IETF_DRAFT_RATELIMIT_LIMIT); @@ -132,9 +136,7 @@ public class HttpConnector2 { } if (is2xx(urlConn.getResponseCode())) { - input = urlConn.getInputStream(); - responseType = urlConn.getContentType(); - return input; + return getInputStream(urlConn, start); } if (is3xx(urlConn.getResponseCode())) { // REDIRECTS @@ -144,6 +146,7 @@ public class HttpConnector2 { .put( REPORT_PREFIX + urlConn.getResponseCode(), String.format("Moved to: %s", newUrl)); + logRequestTime(start); urlConn.disconnect(); if (retryAfter > 0) { backoffAndSleep(retryAfter); @@ -159,26 +162,50 @@ public class HttpConnector2 { if (retryAfter > 0) { log .warn( - "{} - waiting and repeating request after suggested retry-after {} sec.", - requestUrl, retryAfter); + "waiting and repeating request after suggested retry-after {} sec for URL {}", + retryAfter, requestUrl); backoffAndSleep(retryAfter * 1000); } else { log .warn( - "{} - waiting and repeating request after default delay of {} sec.", - requestUrl, getClientParams().getRetryDelay()); - backoffAndSleep(retryNumber * getClientParams().getRetryDelay() * 1000); + "waiting and repeating request after default delay of {} sec for URL {}", + getClientParams().getRetryDelay(), requestUrl); + backoffAndSleep(retryNumber * getClientParams().getRetryDelay()); } report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + + logRequestTime(start); + urlConn.disconnect(); + return attemptDownload(requestUrl, retryNumber + 1, report); + case 422: // UNPROCESSABLE ENTITY + report.put(REPORT_PREFIX + urlConn.getResponseCode(), requestUrl); + log.warn("waiting and repeating request after 10 sec for URL {}", requestUrl); + backoffAndSleep(10000); + urlConn.disconnect(); + logRequestTime(start); + try { + return getInputStream(urlConn, start); + } catch (IOException e) { + log + .error( + "server returned 422 and got IOException accessing the response body from URL {}", + requestUrl); + log.error("IOException:", e); + return attemptDownload(requestUrl, retryNumber + 1, report); + } default: + log.error("gor error {} from URL: {}", urlConn.getResponseCode(), urlConn.getURL()); + log.error("response message: {}", urlConn.getResponseMessage()); report .put( REPORT_PREFIX + urlConn.getResponseCode(), String .format( "%s Error: %s", requestUrl, urlConn.getResponseMessage())); + logRequestTime(start); + urlConn.disconnect(); throw new CollectorException(urlConn.getResponseCode() + " error " + report); } } @@ -199,13 +226,27 @@ public class HttpConnector2 { } } + private InputStream getInputStream(HttpURLConnection urlConn, long start) throws IOException { + InputStream input = urlConn.getInputStream(); + responseType = urlConn.getContentType(); + logRequestTime(start); + return input; + } + + private static void logRequestTime(long start) { + log + .info( + "request time elapsed: {}sec", + TimeUnit.MILLISECONDS.toSeconds(System.currentTimeMillis() - start)); + } + private void logHeaderFields(final HttpURLConnection urlConn) throws IOException { - log.debug("StatusCode: {}", urlConn.getResponseMessage()); + log.info("Response: {} - {}", urlConn.getResponseCode(), urlConn.getResponseMessage()); for (Map.Entry> e : urlConn.getHeaderFields().entrySet()) { if (e.getKey() != null) { for (String v : e.getValue()) { - log.debug(" key: {} - value: {}", e.getKey(), v); + log.info(" key: {} - value: {}", e.getKey(), v); } } } @@ -225,7 +266,7 @@ public class HttpConnector2 { for (String key : headerMap.keySet()) { if ((key != null) && key.equalsIgnoreCase(HttpHeaders.RETRY_AFTER) && (!headerMap.get(key).isEmpty()) && NumberUtils.isCreatable(headerMap.get(key).get(0))) { - return Integer.parseInt(headerMap.get(key).get(0)) + 10; + return Integer.parseInt(headerMap.get(key).get(0)); } } return -1; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java new file mode 100644 index 000000000..027bf0735 --- /dev/null +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/oozie/RunSQLSparkJob.java @@ -0,0 +1,77 @@ + +package eu.dnetlib.dhp.oozie; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; + +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.text.StringSubstitutor; +import org.apache.spark.SparkConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.io.Resources; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; + +public class RunSQLSparkJob { + private static final Logger log = LoggerFactory.getLogger(RunSQLSparkJob.class); + + private final ArgumentApplicationParser parser; + + public RunSQLSparkJob(ArgumentApplicationParser parser) { + this.parser = parser; + } + + public static void main(String[] args) throws Exception { + + Map params = new HashMap<>(); + for (int i = 0; i < args.length - 1; i++) { + if (args[i].startsWith("--")) { + params.put(args[i].substring(2), args[++i]); + } + } + + /* + * String jsonConfiguration = IOUtils .toString( Objects .requireNonNull( RunSQLSparkJob.class + * .getResourceAsStream( "/eu/dnetlib/dhp/oozie/run_sql_parameters.json"))); final ArgumentApplicationParser + * parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); + */ + + Boolean isSparkSessionManaged = Optional + .ofNullable(params.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + URL url = com.google.common.io.Resources.getResource(params.get("sql")); + String raw_sql = Resources.toString(url, StandardCharsets.UTF_8); + + String sql = StringSubstitutor.replace(raw_sql, params); + log.info("sql: {}", sql); + + SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", params.get("hiveMetastoreUris")); + + runWithSparkHiveSession( + conf, + isSparkSessionManaged, + spark -> { + for (String statement : sql.split(";\\s*/\\*\\s*EOS\\s*\\*/\\s*")) { + log.info("executing: {}", statement); + long startTime = System.currentTimeMillis(); + spark.sql(statement).show(); + log + .info( + "executed in {}", + DurationFormatUtils.formatDuration(System.currentTimeMillis() - startTime, "HH:mm:ss.S")); + } + }); + } + +} diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java index 0124e96fc..f01f90fe4 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/GraphCleaningFunctions.java @@ -312,7 +312,8 @@ public class GraphCleaningFunctions extends CleaningFunctions { } if (value instanceof Datasource) { - // nothing to evaluate here + final Datasource d = (Datasource) value; + return Objects.nonNull(d.getOfficialname()) && StringUtils.isNotBlank(d.getOfficialname().getValue()); } else if (value instanceof Project) { final Project p = (Project) value; return Objects.nonNull(p.getCode()) && StringUtils.isNotBlank(p.getCode().getValue()); diff --git a/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json new file mode 100644 index 000000000..355f38e2f --- /dev/null +++ b/dhp-common/src/main/resources/eu/dnetlib/dhp/oozie/run_sql_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "when true will stop SparkSession after job execution", + "paramRequired": false + }, + { + "paramName": "hmu", + "paramLongName": "hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "sql", + "paramLongName": "sql", + "paramDescription": "sql script to execute", + "paramRequired": true + } +] \ No newline at end of file diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java index 3da8eb490..e971ec5bb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/AbstractClusteringFunction.java @@ -14,9 +14,9 @@ import eu.dnetlib.pace.config.Config; public abstract class AbstractClusteringFunction extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public AbstractClusteringFunction(final Map params) { + public AbstractClusteringFunction(final Map params) { this.params = params; } @@ -27,7 +27,7 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(s -> normalize(s)) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) @@ -36,11 +36,24 @@ public abstract class AbstractClusteringFunction extends AbstractPaceFunctions i .collect(Collectors.toCollection(HashSet::new)); } - public Map getParams() { + public Map getParams() { return params; } protected Integer param(String name) { - return params.get(name); + Object val = params.get(name); + if (val == null) + return null; + if (val instanceof Number) { + return ((Number) val).intValue(); + } + return Integer.parseInt(val.toString()); + } + + protected int paramOrDefault(String name, int i) { + Integer res = param(name); + if (res == null) + res = i; + return res; } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java index 9072fbb4b..b5db27106 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Acronyms.java @@ -13,7 +13,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("acronyms") public class Acronyms extends AbstractClusteringFunction { - public Acronyms(Map params) { + public Acronyms(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java index 8b7852418..269de867d 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ClusteringFunction.java @@ -11,6 +11,6 @@ public interface ClusteringFunction { public Collection apply(Config config, List fields); - public Map getParams(); + public Map getParams(); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java index bc8844aee..cbfcde266 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/ImmutableFieldValue.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("immutablefieldvalue") public class ImmutableFieldValue extends AbstractClusteringFunction { - public ImmutableFieldValue(final Map params) { + public ImmutableFieldValue(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java new file mode 100644 index 000000000..e00092bd0 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/JSONListClustering.java @@ -0,0 +1,69 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.commons.lang3.StringUtils; + +import com.jayway.jsonpath.Configuration; +import com.jayway.jsonpath.DocumentContext; +import com.jayway.jsonpath.JsonPath; +import com.jayway.jsonpath.Option; + +import eu.dnetlib.pace.common.AbstractPaceFunctions; +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.util.MapDocumentUtil; + +@ClusteringClass("jsonlistclustering") +public class JSONListClustering extends AbstractPaceFunctions implements ClusteringFunction { + + private Map params; + + public JSONListClustering(Map params) { + this.params = params; + } + + @Override + public Map getParams() { + return params; + } + + @Override + public Collection apply(Config conf, List fields) { + return fields + .stream() + .filter(f -> !f.isEmpty()) + .map(s -> doApply(conf, s)) + .filter(StringUtils::isNotBlank) + .collect(Collectors.toCollection(HashSet::new)); + } + + private String doApply(Config conf, String json) { + StringBuilder st = new StringBuilder(); // to build the string used for comparisons basing on the jpath into + // parameters + final DocumentContext documentContext = JsonPath + .using(Configuration.defaultConfiguration().addOptions(Option.SUPPRESS_EXCEPTIONS)) + .parse(json); + + // for each path in the param list + for (String key : params.keySet().stream().filter(k -> k.contains("jpath")).collect(Collectors.toList())) { + String path = params.get(key).toString(); + String value = MapDocumentUtil.getJPathString(path, documentContext); + if (value == null || value.isEmpty()) + value = ""; + st.append(value); + st.append(" "); + } + + st.setLength(st.length() - 1); + + if (StringUtils.isBlank(st)) { + return "1"; + } + return st.toString(); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java index 38299adb4..fdd8d1fb1 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("keywordsclustering") public class KeywordsClustering extends AbstractClusteringFunction { - public KeywordsClustering(Map params) { + public KeywordsClustering(Map params) { super(params); } @@ -19,8 +19,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { protected Collection doApply(final Config conf, String s) { // takes city codes and keywords codes without duplicates - Set keywords = getKeywords(s, conf.translationMap(), params.getOrDefault("windowSize", 4)); - Set cities = getCities(s, params.getOrDefault("windowSize", 4)); + Set keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4)); + Set cities = getCities(s, paramOrDefault("windowSize", 4)); // list of combination to return as result final Collection combinations = new LinkedHashSet(); @@ -28,7 +28,7 @@ public class KeywordsClustering extends AbstractClusteringFunction { for (String keyword : keywordsToCodes(keywords, conf.translationMap())) { for (String city : citiesToCodes(cities)) { combinations.add(keyword + "-" + city); - if (combinations.size() >= params.getOrDefault("max", 2)) { + if (combinations.size() >= paramOrDefault("max", 2)) { return combinations; } } @@ -42,8 +42,8 @@ public class KeywordsClustering extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::cleanup) - .map(this::normalize) + .map(KeywordsClustering::cleanup) + .map(KeywordsClustering::normalize) .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java index 5a385961a..9692f5762 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LastNameFirstInitial.java @@ -16,7 +16,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = true; - public LastNameFirstInitial(final Map params) { + public LastNameFirstInitial(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(this::normalize) + .map(LastNameFirstInitial::normalize) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) .flatMap(c -> c.stream()) @@ -33,8 +33,7 @@ public class LastNameFirstInitial extends AbstractClusteringFunction { .collect(Collectors.toCollection(HashSet::new)); } - @Override - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input // strings diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java index a3a6c4881..807f41dd5 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LowercaseClustering.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("lowercase") public class LowercaseClustering extends AbstractClusteringFunction { - public LowercaseClustering(final Map params) { + public LowercaseClustering(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java index aa06aa408..bcc9667a8 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NgramPairs.java @@ -12,11 +12,11 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("ngrampairs") public class NgramPairs extends Ngrams { - public NgramPairs(Map params) { + public NgramPairs(Map params) { super(params, false); } - public NgramPairs(Map params, boolean sorted) { + public NgramPairs(Map params, boolean sorted) { super(params, sorted); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java index 96c305a16..7b862c729 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/Ngrams.java @@ -10,11 +10,11 @@ public class Ngrams extends AbstractClusteringFunction { private final boolean sorted; - public Ngrams(Map params) { + public Ngrams(Map params) { this(params, false); } - public Ngrams(Map params, boolean sorted) { + public Ngrams(Map params, boolean sorted) { super(params); this.sorted = sorted; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java new file mode 100644 index 000000000..f1d1e17b9 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/NumAuthorsTitleSuffixPrefixChain.java @@ -0,0 +1,113 @@ + +package eu.dnetlib.pace.clustering; + +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + +import com.google.common.base.Splitter; +import com.google.common.collect.Sets; + +import eu.dnetlib.pace.config.Config; + +@ClusteringClass("numAuthorsTitleSuffixPrefixChain") +public class NumAuthorsTitleSuffixPrefixChain extends AbstractClusteringFunction { + + public NumAuthorsTitleSuffixPrefixChain(Map params) { + super(params); + } + + @Override + public Collection apply(Config conf, List fields) { + + try { + int num_authors = Math.min(Integer.parseInt(fields.get(0)), 21); // SIZE threshold is 20, +1 + + if (num_authors > 0) { + return super.apply(conf, fields.subList(1, fields.size())) + .stream() + .map(s -> num_authors + "-" + s) + .collect(Collectors.toList()); + } + } catch (NumberFormatException e) { + // missing or null authors array + } + + return Collections.emptyList(); + } + + @Override + protected Collection doApply(Config conf, String s) { + return suffixPrefixChain(cleanup(s), param("mod")); + } + + private Collection suffixPrefixChain(String s, int mod) { + // create the list of words from the string (remove short words) + List wordsList = Arrays + .stream(s.split(" ")) + .filter(si -> si.length() > 3) + .collect(Collectors.toList()); + + final int words = wordsList.size(); + final int letters = s.length(); + + // create the prefix: number of words + number of letters/mod + String prefix = words / mod + "-"; + + return doSuffixPrefixChain(wordsList, prefix); + + } + + private Collection doSuffixPrefixChain(List wordsList, String prefix) { + + Set set = Sets.newLinkedHashSet(); + switch (wordsList.size()) { + case 0: + break; + case 1: + set.add(wordsList.get(0)); + break; + case 2: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3)); + + break; + default: + set + .add( + prefix + + suffix(wordsList.get(0), 3) + + prefix(wordsList.get(1), 3) + + suffix(wordsList.get(2), 3)); + + set + .add( + prefix + + prefix(wordsList.get(0), 3) + + suffix(wordsList.get(1), 3) + + prefix(wordsList.get(2), 3)); + break; + } + + return set; + + } + + private String suffix(String s, int len) { + return s.substring(s.length() - len); + } + + private String prefix(String s, int len) { + return s.substring(0, len); + } + +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java index b4a04ce65..91b51bebb 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonClustering.java @@ -17,11 +17,11 @@ import eu.dnetlib.pace.model.Person; @ClusteringClass("personClustering") public class PersonClustering extends AbstractPaceFunctions implements ClusteringFunction { - private Map params; + private Map params; private static final int MAX_TOKENS = 5; - public PersonClustering(final Map params) { + public PersonClustering(final Map params) { this.params = params; } @@ -77,7 +77,7 @@ public class PersonClustering extends AbstractPaceFunctions implements Clusterin // } @Override - public Map getParams() { + public Map getParams() { return params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java index a3d58a9be..09a112c37 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/PersonHash.java @@ -15,7 +15,7 @@ public class PersonHash extends AbstractClusteringFunction { private boolean DEFAULT_AGGRESSIVE = false; - public PersonHash(final Map params) { + public PersonHash(final Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java index 2aab926da..3733dfc74 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/RandomClusteringFunction.java @@ -8,7 +8,7 @@ import eu.dnetlib.pace.config.Config; public class RandomClusteringFunction extends AbstractClusteringFunction { - public RandomClusteringFunction(Map params) { + public RandomClusteringFunction(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java index b085ae26d..ca1b4189b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SortedNgramPairs.java @@ -1,7 +1,10 @@ package eu.dnetlib.pace.clustering; -import java.util.*; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; import com.google.common.base.Joiner; import com.google.common.base.Splitter; @@ -12,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("sortedngrampairs") public class SortedNgramPairs extends NgramPairs { - public SortedNgramPairs(Map params) { + public SortedNgramPairs(Map params) { super(params, false); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java index 392aecc79..048380f7e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SpaceTrimmingFieldValue.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("spacetrimmingfieldvalue") public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { - public SpaceTrimmingFieldValue(final Map params) { + public SpaceTrimmingFieldValue(final Map params) { super(params); } @@ -25,7 +25,7 @@ public class SpaceTrimmingFieldValue extends AbstractClusteringFunction { res .add( - StringUtils.isBlank(s) ? RandomStringUtils.random(getParams().get("randomLength")) + StringUtils.isBlank(s) ? RandomStringUtils.random(param("randomLength")) : s.toLowerCase().replaceAll("\\s+", "")); return res; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java index 2a1c023a9..b6921e9f1 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/SuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("suffixprefix") public class SuffixPrefix extends AbstractClusteringFunction { - public SuffixPrefix(Map params) { + public SuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java index 5b267ad10..34f41085b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/UrlClustering.java @@ -15,12 +15,17 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("urlclustering") public class UrlClustering extends AbstractPaceFunctions implements ClusteringFunction { - protected Map params; + protected Map params; - public UrlClustering(final Map params) { + public UrlClustering(final Map params) { this.params = params; } + @Override + public Map getParams() { + return params; + } + @Override public Collection apply(final Config conf, List fields) { try { @@ -35,11 +40,6 @@ public class UrlClustering extends AbstractPaceFunctions implements ClusteringFu } } - @Override - public Map getParams() { - return null; - } - private URL asUrl(String value) { try { return new URL(value); diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java index c8e02f8f0..22351cf8f 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsStatsSuffixPrefixChain.java @@ -11,7 +11,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordsStatsSuffixPrefixChain") public class WordsStatsSuffixPrefixChain extends AbstractClusteringFunction { - public WordsStatsSuffixPrefixChain(Map params) { + public WordsStatsSuffixPrefixChain(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java index e606590a5..f9fef376b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/WordsSuffixPrefix.java @@ -12,7 +12,7 @@ import eu.dnetlib.pace.config.Config; @ClusteringClass("wordssuffixprefix") public class WordsSuffixPrefix extends AbstractClusteringFunction { - public WordsSuffixPrefix(Map params) { + public WordsSuffixPrefix(Map params) { super(params); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index b440686de..ba7639ada 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -16,7 +16,6 @@ import org.apache.commons.lang3.StringUtils; import com.google.common.base.Joiner; import com.google.common.base.Splitter; import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.ibm.icu.text.Transliterator; @@ -27,7 +26,7 @@ import eu.dnetlib.pace.clustering.NGramUtils; * * @author claudio */ -public abstract class AbstractPaceFunctions { +public class AbstractPaceFunctions { // city map to be used when translating the city names into codes private static Map cityMap = AbstractPaceFunctions @@ -62,11 +61,14 @@ public abstract class AbstractPaceFunctions { private static Pattern hexUnicodePattern = Pattern.compile("\\\\u(\\p{XDigit}{4})"); - protected String concat(final List l) { + private static Pattern romanNumberPattern = Pattern + .compile("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$"); + + protected static String concat(final List l) { return Joiner.on(" ").skipNulls().join(l); } - protected String cleanup(final String s) { + public static String cleanup(final String s) { final String s1 = HTML_REGEX.matcher(s).replaceAll(""); final String s2 = unicodeNormalization(s1.toLowerCase()); final String s3 = nfd(s2); @@ -82,7 +84,7 @@ public abstract class AbstractPaceFunctions { return s12; } - protected String fixXML(final String a) { + protected static String fixXML(final String a) { return a .replaceAll("–", " ") @@ -91,7 +93,7 @@ public abstract class AbstractPaceFunctions { .replaceAll("−", " "); } - protected boolean checkNumbers(final String a, final String b) { + protected static boolean checkNumbers(final String a, final String b) { final String numbersA = getNumbers(a); final String numbersB = getNumbers(b); final String romansA = getRomans(a); @@ -99,7 +101,7 @@ public abstract class AbstractPaceFunctions { return !numbersA.equals(numbersB) || !romansA.equals(romansB); } - protected String getRomans(final String s) { + protected static String getRomans(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isRoman(t) ? t : ""); @@ -107,13 +109,12 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - protected boolean isRoman(final String s) { - return s - .replaceAll("^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$", "qwertyuiop") - .equals("qwertyuiop"); + protected static boolean isRoman(final String s) { + Matcher m = romanNumberPattern.matcher(s); + return m.matches() && m.hitEnd(); } - protected String getNumbers(final String s) { + protected static String getNumbers(final String s) { final StringBuilder sb = new StringBuilder(); for (final String t : s.split(" ")) { sb.append(isNumber(t) ? t : ""); @@ -121,7 +122,7 @@ public abstract class AbstractPaceFunctions { return sb.toString(); } - public boolean isNumber(String strNum) { + public static boolean isNumber(String strNum) { if (strNum == null) { return false; } @@ -147,7 +148,7 @@ public abstract class AbstractPaceFunctions { } } - protected String removeSymbols(final String s) { + protected static String removeSymbols(final String s) { final StringBuilder sb = new StringBuilder(); s.chars().forEach(ch -> { @@ -157,11 +158,11 @@ public abstract class AbstractPaceFunctions { return sb.toString().replaceAll("\\s+", " "); } - protected boolean notNull(final String s) { + protected static boolean notNull(final String s) { return s != null; } - protected String normalize(final String s) { + public static String normalize(final String s) { return fixAliases(transliterate(nfd(unicodeNormalization(s)))) .toLowerCase() // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input @@ -174,16 +175,16 @@ public abstract class AbstractPaceFunctions { .trim(); } - public String nfd(final String s) { + public static String nfd(final String s) { return Normalizer.normalize(s, Normalizer.Form.NFD); } - public String utf8(final String s) { + public static String utf8(final String s) { byte[] bytes = s.getBytes(StandardCharsets.UTF_8); return new String(bytes, StandardCharsets.UTF_8); } - public String unicodeNormalization(final String s) { + public static String unicodeNormalization(final String s) { Matcher m = hexUnicodePattern.matcher(s); StringBuffer buf = new StringBuffer(s.length()); @@ -195,7 +196,7 @@ public abstract class AbstractPaceFunctions { return buf.toString(); } - protected String filterStopWords(final String s, final Set stopwords) { + protected static String filterStopWords(final String s, final Set stopwords) { final StringTokenizer st = new StringTokenizer(s); final StringBuilder sb = new StringBuilder(); while (st.hasMoreTokens()) { @@ -208,7 +209,7 @@ public abstract class AbstractPaceFunctions { return sb.toString().trim(); } - public String filterAllStopWords(String s) { + public static String filterAllStopWords(String s) { s = filterStopWords(s, stopwords_en); s = filterStopWords(s, stopwords_de); @@ -221,7 +222,8 @@ public abstract class AbstractPaceFunctions { return s; } - protected Collection filterBlacklisted(final Collection set, final Set ngramBlacklist) { + protected static Collection filterBlacklisted(final Collection set, + final Set ngramBlacklist) { final Set newset = Sets.newLinkedHashSet(); for (final String s : set) { if (!ngramBlacklist.contains(s)) { @@ -268,7 +270,7 @@ public abstract class AbstractPaceFunctions { return m; } - public String removeKeywords(String s, Set keywords) { + public static String removeKeywords(String s, Set keywords) { s = " " + s + " "; for (String k : keywords) { @@ -278,39 +280,39 @@ public abstract class AbstractPaceFunctions { return s.trim(); } - public double commonElementsPercentage(Set s1, Set s2) { + public static double commonElementsPercentage(Set s1, Set s2) { double longer = Math.max(s1.size(), s2.size()); return (double) s1.stream().filter(s2::contains).count() / longer; } // convert the set of keywords to codes - public Set toCodes(Set keywords, Map translationMap) { + public static Set toCodes(Set keywords, Map translationMap) { return keywords.stream().map(s -> translationMap.get(s)).collect(Collectors.toSet()); } - public Set keywordsToCodes(Set keywords, Map translationMap) { + public static Set keywordsToCodes(Set keywords, Map translationMap) { return toCodes(keywords, translationMap); } - public Set citiesToCodes(Set keywords) { + public static Set citiesToCodes(Set keywords) { return toCodes(keywords, cityMap); } - protected String firstLC(final String s) { + protected static String firstLC(final String s) { return StringUtils.substring(s, 0, 1).toLowerCase(); } - protected Iterable tokens(final String s, final int maxTokens) { + protected static Iterable tokens(final String s, final int maxTokens) { return Iterables.limit(Splitter.on(" ").omitEmptyStrings().trimResults().split(s), maxTokens); } - public String normalizePid(String pid) { + public static String normalizePid(String pid) { return DOI_PREFIX.matcher(pid.toLowerCase()).replaceAll(""); } // get the list of keywords into the input string - public Set getKeywords(String s1, Map translationMap, int windowSize) { + public static Set getKeywords(String s1, Map translationMap, int windowSize) { String s = s1; @@ -340,7 +342,7 @@ public abstract class AbstractPaceFunctions { return codes; } - public Set getCities(String s1, int windowSize) { + public static Set getCities(String s1, int windowSize) { return getKeywords(s1, cityMap, windowSize); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java index d9ad81d42..5ede2c380 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/ClusteringDef.java @@ -18,7 +18,7 @@ public class ClusteringDef implements Serializable { private List fields; - private Map params; + private Map params; public ClusteringDef() { } @@ -43,11 +43,11 @@ public class ClusteringDef implements Serializable { this.fields = fields; } - public Map getParams() { + public Map getParams() { return params; } - public void setParams(final Map params) { + public void setParams(final Map params) { this.params = params; } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index f34545e6d..7ad9b7445 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -2,6 +2,7 @@ package eu.dnetlib.pace.model; import java.io.Serializable; +import java.util.HashSet; import java.util.List; import com.fasterxml.jackson.core.JsonProcessingException; @@ -36,6 +37,16 @@ public class FieldDef implements Serializable { */ private int length = -1; + private HashSet filter; + + private boolean sorted; + + public boolean isSorted() { + return sorted; + } + + private String clean; + public FieldDef() { } @@ -91,6 +102,30 @@ public class FieldDef implements Serializable { this.path = path; } + public HashSet getFilter() { + return filter; + } + + public void setFilter(HashSet filter) { + this.filter = filter; + } + + public boolean getSorted() { + return sorted; + } + + public void setSorted(boolean sorted) { + this.sorted = sorted; + } + + public String getClean() { + return clean; + } + + public void setClean(String clean) { + this.clean = clean; + } + @Override public String toString() { try { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala index b3f56bcdb..bc702b9e2 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkDeduper.scala @@ -5,9 +5,9 @@ import eu.dnetlib.pace.util.{BlockProcessor, SparkReporter} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.expressions._ -import org.apache.spark.sql.functions.{col, lit, udf} +import org.apache.spark.sql.functions.{col, desc, expr, lit, udf} import org.apache.spark.sql.types._ -import org.apache.spark.sql.{Column, Dataset, Row, functions} +import org.apache.spark.sql.{Column, Dataset, Row, SaveMode, functions} import java.util.function.Predicate import java.util.stream.Collectors @@ -80,6 +80,8 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { .withColumn("key", functions.explode(clusterValuesUDF(cd).apply(functions.array(inputColumns: _*)))) // Add position column having the position of the row within the set of rows having the same key value ordered by the sorting value .withColumn("position", functions.row_number().over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)))) + // .withColumn("count", functions.max("position").over(Window.partitionBy("key").orderBy(col(model.orderingFieldName), col(model.identifierFieldName)).rowsBetween(Window.unboundedPreceding,Window.unboundedFollowing) )) + // .filter("count > 1") if (df_with_clustering_keys == null) df_with_clustering_keys = ds @@ -88,20 +90,44 @@ case class SparkDeduper(conf: DedupConfig) extends Serializable { } //TODO: analytics + /*df_with_clustering_keys.groupBy(col("clustering"), col("key")) + .agg(expr("max(count) AS size")) + .orderBy(desc("size")) + .show*/ val df_with_blocks = df_with_clustering_keys - // filter out rows with position exceeding the maxqueuesize parameter - .filter(col("position").leq(conf.getWf.getQueueMaxSize)) - .groupBy("clustering", "key") + // split the clustering block into smaller blocks of queuemaxsize + .groupBy(col("clustering"), col("key"), functions.floor(col("position").divide(lit(conf.getWf.getQueueMaxSize)))) .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) .filter(functions.size(new Column("block")).gt(1)) + .union( + //adjacency blocks + df_with_clustering_keys + // filter out leading and trailing elements + .filter(col("position").gt(conf.getWf.getSlidingWindowSize/2)) + //.filter(col("position").lt(col("count").minus(conf.getWf.getSlidingWindowSize/2))) + // create small blocks of records on "the border" of maxqueuesize: getSlidingWindowSize/2 elements before and after + .filter( + col("position").mod(conf.getWf.getQueueMaxSize).lt(conf.getWf.getSlidingWindowSize/2) // slice of the start of block + || col("position").mod(conf.getWf.getQueueMaxSize).gt(conf.getWf.getQueueMaxSize - (conf.getWf.getSlidingWindowSize/2)) //slice of the end of the block + ) + .groupBy(col("clustering"), col("key"), functions.floor((col("position") + lit(conf.getWf.getSlidingWindowSize/2)).divide(lit(conf.getWf.getQueueMaxSize)))) + .agg(functions.collect_set(functions.struct(model.schema.fieldNames.map(col): _*)).as("block")) + .filter(functions.size(new Column("block")).gt(1)) + ) df_with_blocks } def clusterValuesUDF(cd: ClusteringDef) = { udf[mutable.WrappedArray[String], mutable.WrappedArray[Any]](values => { - values.flatMap(f => cd.clusteringFunction().apply(conf, Seq(f.toString).asJava).asScala) + val valueList = values.flatMap { + case a: mutable.WrappedArray[Any] => a.map(_.toString) + case s: Any => Seq(s.toString) + }.asJava; + + mutable.WrappedArray.make(cd.clusteringFunction().apply(conf, valueList).toArray()) + }) } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index aa997c6e9..aa04188da 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -1,13 +1,16 @@ package eu.dnetlib.pace.model import com.jayway.jsonpath.{Configuration, JsonPath} +import eu.dnetlib.pace.common.AbstractPaceFunctions import eu.dnetlib.pace.config.{DedupConfig, Type} import eu.dnetlib.pace.util.MapDocumentUtil +import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType} import org.apache.spark.sql.{Dataset, Row} +import java.util.Locale import java.util.regex.Pattern import scala.collection.JavaConverters._ @@ -60,7 +63,7 @@ case class SparkModel(conf: DedupConfig) { values(identityFieldPosition) = MapDocumentUtil.getJPathString(conf.getWf.getIdPath, documentContext) schema.fieldNames.zipWithIndex.foldLeft(values) { - case ((res, (fname, index))) => { + case ((res, (fname, index))) => val fdef = conf.getPace.getModelMap.get(fname) if (fdef != null) { @@ -96,13 +99,52 @@ case class SparkModel(conf: DedupConfig) { case Type.DoubleArray => MapDocumentUtil.getJPathArray(fdef.getPath, json) } + + val filter = fdef.getFilter + + if (StringUtils.isNotBlank(fdef.getClean)) { + res(index) = res(index) match { + case x: Seq[String] => x.map(clean(_, fdef.getClean)).toSeq + case _ => clean(res(index).toString, fdef.getClean) + } + } + + if (filter != null && !filter.isEmpty) { + res(index) = res(index) match { + case x: String if filter.contains(x.toLowerCase(Locale.ROOT)) => null + case x: Seq[String] => x.filter(s => !filter.contains(s.toLowerCase(Locale.ROOT))).toSeq + case _ => res(index) + } + } + + if (fdef.getSorted) { + res(index) = res(index) match { + case x: Seq[String] => x.sorted.toSeq + case _ => res(index) + } + } } res - } } new GenericRowWithSchema(values, schema) } + + def clean(value: String, cleantype: String) : String = { + val res = cleantype match { + case "title" => AbstractPaceFunctions.cleanup(value) + case _ => value + } + +// if (!res.equals(AbstractPaceFunctions.normalize(value))) { +// println(res) +// println(AbstractPaceFunctions.normalize(value)) +// println() +// } + + res + } + } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java index 5c6939e60..edad0ae2e 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/AuthorsMatch.java @@ -23,7 +23,6 @@ public class AuthorsMatch extends AbstractListComparator { private String MODE; // full or surname private int SIZE_THRESHOLD; private String TYPE; // count or percentage - private int common; public AuthorsMatch(Map params) { super(params, new com.wcohen.ss.JaroWinkler()); @@ -35,7 +34,6 @@ public class AuthorsMatch extends AbstractListComparator { FULLNAME_THRESHOLD = Double.parseDouble(params.getOrDefault("fullname_th", "0.9")); SIZE_THRESHOLD = Integer.parseInt(params.getOrDefault("size_th", "20")); TYPE = params.getOrDefault("type", "percentage"); - common = 0; } protected AuthorsMatch(double w, AbstractStringDistance ssalgo) { @@ -44,22 +42,27 @@ public class AuthorsMatch extends AbstractListComparator { @Override public double compare(final List a, final List b, final Config conf) { - if (a.isEmpty() || b.isEmpty()) return -1; if (a.size() > SIZE_THRESHOLD || b.size() > SIZE_THRESHOLD) return 1.0; - List aList = a.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); + int maxMiss = Integer.MAX_VALUE; List bList = b.stream().map(author -> new Person(author, false)).collect(Collectors.toList()); - common = 0; + Double threshold = getDoubleParam("threshold"); + + if (threshold != null && threshold >= 0.0 && threshold <= 1.0 && a.size() == b.size()) { + maxMiss = (int) Math.floor((1 - threshold) * Math.max(a.size(), b.size())); + } + + int common = 0; // compare each element of List1 with each element of List2 - for (Person p1 : aList) + for (int i = 0; i < a.size(); i++) { + Person p1 = new Person(a.get(i), false); for (Person p2 : bList) { - // both persons are inaccurate if (!p1.isAccurate() && !p2.isAccurate()) { // compare just normalized fullnames @@ -118,11 +121,15 @@ public class AuthorsMatch extends AbstractListComparator { } } - } + if (i - common > maxMiss) { + return 0.0; + } + } + // normalization factor to compute the score - int normFactor = aList.size() == bList.size() ? aList.size() : (aList.size() + bList.size() - common); + int normFactor = a.size() == b.size() ? a.size() : (a.size() + b.size() - common); if (TYPE.equals("percentage")) { return (double) common / normFactor; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java index 238cb16ce..34ebcf7a7 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/InstanceTypeMatch.java @@ -25,6 +25,7 @@ public class InstanceTypeMatch extends AbstractListComparator { translationMap.put("Conference object", "*"); translationMap.put("Other literature type", "*"); translationMap.put("Unknown", "*"); + translationMap.put("UNKNOWN", "*"); // article types translationMap.put("Article", "Article"); @@ -76,5 +77,4 @@ public class InstanceTypeMatch extends AbstractListComparator { protected double normalize(final double d) { return d; } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java index 877cb95ab..e2ee062b5 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/LevensteinTitle.java @@ -3,6 +3,7 @@ package eu.dnetlib.pace.tree; import java.util.Map; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,16 +31,25 @@ public class LevensteinTitle extends AbstractStringComparator { } @Override - public double distance(final String a, final String b, final Config conf) { - final String ca = cleanup(a); - final String cb = cleanup(b); - + public double distance(final String ca, final String cb, final Config conf) { final boolean check = checkNumbers(ca, cb); if (check) return 0.5; - return normalize(ssalgo.score(ca, cb), ca.length(), cb.length()); + Double threshold = getDoubleParam("threshold"); + + // reduce Levenshtein algo complexity when target threshold is known + if (threshold != null && threshold >= 0.0 && threshold <= 1.0) { + int maxdistance = (int) Math.floor((1 - threshold) * Math.max(ca.length(), cb.length())); + int score = StringUtils.getLevenshteinDistance(ca, cb, maxdistance); + if (score == -1) { + return 0; + } + return normalize(score, ca.length(), cb.length()); + } else { + return normalize(StringUtils.getLevenshteinDistance(ca, cb), ca.length(), cb.length()); + } } private double normalize(final double score, final int la, final int lb) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java new file mode 100644 index 000000000..8f525c6d5 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/MaxLengthMatch.java @@ -0,0 +1,29 @@ + +package eu.dnetlib.pace.tree; + +import java.util.Map; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + +@ComparatorClass("maxLengthMatch") +public class MaxLengthMatch extends AbstractStringComparator { + + private final int limit; + + public MaxLengthMatch(Map params) { + super(params); + + limit = Integer.parseInt(params.getOrDefault("limit", "200")); + } + + @Override + public double compare(String a, String b, final Config conf) { + return a.length() < limit && b.length() < limit ? 1.0 : -1.0; + } + + protected String toString(final Object object) { + return toFirstString(object); + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java index 8a957c5e3..cde73fd2b 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/AbstractComparator.java @@ -127,4 +127,14 @@ public abstract class AbstractComparator extends AbstractPaceFunctions implem return this.weight; } + public Double getDoubleParam(String name) { + String svalue = params.get(name); + + try { + return Double.parseDouble(svalue); + } catch (Throwable t) { + } + + return null; + } } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java index c2b0ddda7..177ad73df 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/BlockProcessor.java @@ -67,8 +67,10 @@ public class BlockProcessor { private void processRows(final List queue, final Reporter context) { - for (int pivotPos = 0; pivotPos < queue.size(); pivotPos++) { - final Row pivot = queue.get(pivotPos); + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(queue.size()); + + for (int i = 0; i < queue.size(); i++) { + final Row pivot = queue.get(i); final String idPivot = pivot.getString(identifierFieldPos); // identifier final Object fieldsPivot = getJavaValue(pivot, orderFieldPos); @@ -76,9 +78,9 @@ public class BlockProcessor { final WfConfig wf = dedupConf.getWf(); if (fieldPivot != null) { - int i = 0; - for (int windowPos = pivotPos + 1; windowPos < queue.size(); windowPos++) { - final Row curr = queue.get(windowPos); + for (int j = icc.nextUnconnected(i, i + 1); j >= 0 + && j < queue.size(); j = icc.nextUnconnected(i, j + 1)) { + final Row curr = queue.get(j); final String idCurr = curr.getString(identifierFieldPos); // identifier if (mustSkip(idCurr)) { @@ -86,7 +88,7 @@ public class BlockProcessor { break; } - if (++i > wf.getSlidingWindowSize()) { + if (wf.getSlidingWindowSize() > 0 && (j - i) > wf.getSlidingWindowSize()) { break; } @@ -97,7 +99,9 @@ public class BlockProcessor { final TreeProcessor treeProcessor = new TreeProcessor(dedupConf); - emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context); + if (emitOutput(treeProcessor.compare(pivot, curr), idPivot, idCurr, context)) { + icc.connect(i, j); + } } } } @@ -115,7 +119,8 @@ public class BlockProcessor { return null; } - private void emitOutput(final boolean result, final String idPivot, final String idCurr, final Reporter context) { + private boolean emitOutput(final boolean result, final String idPivot, final String idCurr, + final Reporter context) { if (result) { if (idPivot.compareTo(idCurr) <= 0) { @@ -127,6 +132,8 @@ public class BlockProcessor { } else { context.incrementCounter(dedupConf.getWf().getEntityType(), "d < " + dedupConf.getWf().getThreshold(), 1); } + + return result; } private boolean mustSkip(final String idPivot) { @@ -142,5 +149,4 @@ public class BlockProcessor { context.emit(type, from, to); } - } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java new file mode 100644 index 000000000..ed35239a8 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/IncrementalConnectedComponents.java @@ -0,0 +1,50 @@ + +package eu.dnetlib.pace.util; + +import java.util.BitSet; + +public class IncrementalConnectedComponents { + final private int size; + + final private BitSet[] indexes; + + IncrementalConnectedComponents(int size) { + this.size = size; + this.indexes = new BitSet[size]; + } + + public void connect(int i, int j) { + if (indexes[i] == null) { + if (indexes[j] == null) { + indexes[i] = new BitSet(size); + } else { + indexes[i] = indexes[j]; + } + } else { + if (indexes[j] != null && indexes[i] != indexes[j]) { + // merge adjacency lists for i and j + indexes[i].or(indexes[j]); + } + } + + indexes[i].set(i); + indexes[i].set(j); + indexes[j] = indexes[i]; + } + + public int nextUnconnected(int i, int j) { + if (indexes[i] == null) { + return j; + } + int result = indexes[i].nextClearBit(j); + + return (result >= size) ? -1 : result; + } + + public BitSet getConnections(int i) { + if (indexes[i] == null) { + return null; + } + return indexes[i]; + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java index 28244cb3b..7dc340663 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/MapDocumentUtil.java @@ -97,6 +97,8 @@ public class MapDocumentUtil { Object o = json.read(jsonPath); if (o instanceof String) return (String) o; + if (o instanceof Number) + return (String) o.toString(); if (o instanceof JSONArray && ((JSONArray) o).size() > 0) return (String) ((JSONArray) o).get(0); return ""; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java index 252205c79..746892f0c 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/util/PaceResolver.java @@ -40,7 +40,7 @@ public class PaceResolver implements Serializable { Collectors.toMap(cl -> cl.getAnnotation(ComparatorClass.class).value(), cl -> (Class) cl)); } - public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { + public ClusteringFunction getClusteringFunction(String name, Map params) throws PaceException { try { return clusteringFunctions.get(name).getDeclaredConstructor(Map.class).newInstance(params); } catch (InstantiationException | IllegalAccessException | InvocationTargetException diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index f9a1ea9e2..80e349a3f 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -15,7 +15,7 @@ import eu.dnetlib.pace.config.DedupConfig; public class ClusteringFunctionTest extends AbstractPaceTest { - private static Map params; + private static Map params; private static DedupConfig conf; @BeforeAll @@ -40,10 +40,10 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgram() { - params.put("ngramLen", 3); - params.put("max", 8); - params.put("maxPerToken", 2); - params.put("minNgramLen", 1); + params.put("ngramLen", "3"); + params.put("max", "8"); + params.put("maxPerToken", "2"); + params.put("minNgramLen", "1"); final ClusteringFunction ngram = new Ngrams(params); @@ -54,8 +54,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new NgramPairs(params); @@ -66,8 +66,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSortedNgramPairs() { - params.put("ngramLen", 3); - params.put("max", 2); + params.put("ngramLen", "3"); + params.put("max", "2"); final ClusteringFunction np = new SortedNgramPairs(params); @@ -87,9 +87,9 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testAcronym() { - params.put("max", 4); - params.put("minLen", 1); - params.put("maxLen", 3); + params.put("max", "4"); + params.put("minLen", "1"); + params.put("maxLen", "3"); final ClusteringFunction acro = new Acronyms(params); @@ -100,8 +100,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new SuffixPrefix(params); @@ -109,8 +109,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { System.out.println(s); System.out.println(sp.apply(conf, Lists.newArrayList(s))); - params.put("len", 3); - params.put("max", 1); + params.put("len", "3"); + params.put("max", "1"); System.out.println(sp.apply(conf, Lists.newArrayList("Framework for general-purpose deduplication"))); } @@ -118,8 +118,8 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsSuffixPrefix() { - params.put("len", 3); - params.put("max", 4); + params.put("len", "3"); + params.put("max", "4"); final ClusteringFunction sp = new WordsSuffixPrefix(params); @@ -130,7 +130,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testWordsStatsSuffixPrefix() { - params.put("mod", 10); + params.put("mod", "10"); final ClusteringFunction sp = new WordsStatsSuffixPrefixChain(params); @@ -167,7 +167,7 @@ public class ClusteringFunctionTest extends AbstractPaceTest { @Test public void testFieldValue() { - params.put("randomLength", 5); + params.put("randomLength", "5"); final ClusteringFunction sp = new SpaceTrimmingFieldValue(params); diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java new file mode 100644 index 000000000..b0f105d7c --- /dev/null +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/IncrementalConnectedComponentsTest.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.pace.util; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; + +import org.junit.jupiter.api.Test; + +public class IncrementalConnectedComponentsTest { + + @Test + public void transitiveClosureTest() { + IncrementalConnectedComponents icc = new IncrementalConnectedComponents(10); + + icc.connect(0, 1); + icc.connect(0, 2); + icc.connect(0, 3); + + icc.connect(1, 2); + icc.connect(1, 4); + icc.connect(1, 5); + + icc.connect(6, 7); + icc.connect(6, 9); + + assertEquals(icc.getConnections(0).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(1).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(2).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(3).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(4).toString(), "{0, 1, 2, 3, 4, 5}"); + assertEquals(icc.getConnections(5).toString(), "{0, 1, 2, 3, 4, 5}"); + + assertEquals(icc.getConnections(6).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(7).toString(), "{6, 7, 9}"); + assertEquals(icc.getConnections(9).toString(), "{6, 7, 9}"); + + assertNull(icc.getConnections(8)); + } + +} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java new file mode 100644 index 000000000..8fb9c8c95 --- /dev/null +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteAction.java @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2024. + * SPDX-FileCopyrightText: © 2023 Consiglio Nazionale delle Ricerche + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +package eu.dnetlib.dhp.actionmanager.promote; + +/** Encodes the Actionset promotion strategies */ +public class PromoteAction { + + /** The supported actionset promotion strategies + * + * ENRICH: promotes only records in the actionset matching another record in the + * graph and enriches them applying the given MergeAndGet strategy + * UPSERT: promotes all the records in an actionset, matching records are updated + * using the given MergeAndGet strategy, the non-matching record as inserted as they are. + */ + public enum Strategy { + ENRICH, UPSERT + } + + /** + * Returns the string representation of the join type implementing the given PromoteAction. + * + * @param strategy the strategy to be used to promote the Actionset contents + * @return the join type used to implement the promotion strategy + */ + public static String joinTypeForStrategy(PromoteAction.Strategy strategy) { + switch (strategy) { + case ENRICH: + return "left_outer"; + case UPSERT: + return "full_outer"; + default: + throw new IllegalStateException("unsupported PromoteAction: " + strategy.toString()); + } + } +} diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java index 7b024bea8..56cbda4d6 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadForGraphTableJob.java @@ -67,8 +67,9 @@ public class PromoteActionPayloadForGraphTableJob { String outputGraphTablePath = parser.get("outputGraphTablePath"); logger.info("outputGraphTablePath: {}", outputGraphTablePath); - MergeAndGet.Strategy strategy = MergeAndGet.Strategy.valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); - logger.info("strategy: {}", strategy); + MergeAndGet.Strategy mergeAndGetStrategy = MergeAndGet.Strategy + .valueOf(parser.get("mergeAndGetStrategy").toUpperCase()); + logger.info("mergeAndGetStrategy: {}", mergeAndGetStrategy); Boolean shouldGroupById = Optional .ofNullable(parser.get("shouldGroupById")) @@ -76,6 +77,12 @@ public class PromoteActionPayloadForGraphTableJob { .orElse(true); logger.info("shouldGroupById: {}", shouldGroupById); + PromoteAction.Strategy promoteActionStrategy = Optional + .ofNullable(parser.get("promoteActionStrategy")) + .map(PromoteAction.Strategy::valueOf) + .orElse(PromoteAction.Strategy.UPSERT); + logger.info("promoteActionStrategy: {}", promoteActionStrategy); + @SuppressWarnings("unchecked") Class rowClazz = (Class) Class.forName(graphTableClassName); @SuppressWarnings("unchecked") @@ -97,7 +104,8 @@ public class PromoteActionPayloadForGraphTableJob { inputGraphTablePath, inputActionPayloadPath, outputGraphTablePath, - strategy, + mergeAndGetStrategy, + promoteActionStrategy, rowClazz, actionPayloadClazz, shouldGroupById); @@ -124,14 +132,16 @@ public class PromoteActionPayloadForGraphTableJob { String inputGraphTablePath, String inputActionPayloadPath, String outputGraphTablePath, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { Dataset rowDS = readGraphTable(spark, inputGraphTablePath, rowClazz); Dataset actionPayloadDS = readActionPayload(spark, inputActionPayloadPath, actionPayloadClazz); Dataset result = promoteActionPayloadForGraphTable( - rowDS, actionPayloadDS, strategy, rowClazz, actionPayloadClazz, shouldGroupById) + rowDS, actionPayloadDS, mergeAndGetStrategy, promoteActionStrategy, rowClazz, actionPayloadClazz, + shouldGroupById) .map((MapFunction) value -> value, Encoders.bean(rowClazz)); saveGraphTable(result, outputGraphTablePath); @@ -183,7 +193,8 @@ public class PromoteActionPayloadForGraphTableJob { private static Dataset promoteActionPayloadForGraphTable( Dataset rowDS, Dataset actionPayloadDS, - MergeAndGet.Strategy strategy, + MergeAndGet.Strategy mergeAndGetStrategy, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz, Boolean shouldGroupById) { @@ -195,8 +206,9 @@ public class PromoteActionPayloadForGraphTableJob { SerializableSupplier> rowIdFn = ModelSupport::idFn; SerializableSupplier> actionPayloadIdFn = ModelSupport::idFn; - SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet.functionFor(strategy); - SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(strategy); + SerializableSupplier> mergeRowWithActionPayloadAndGetFn = MergeAndGet + .functionFor(mergeAndGetStrategy); + SerializableSupplier> mergeRowsAndGetFn = MergeAndGet.functionFor(mergeAndGetStrategy); SerializableSupplier zeroFn = zeroFn(rowClazz); SerializableSupplier> isNotZeroFn = PromoteActionPayloadForGraphTableJob::isNotZeroFnUsingIdOrSourceAndTarget; @@ -207,6 +219,7 @@ public class PromoteActionPayloadForGraphTableJob { rowIdFn, actionPayloadIdFn, mergeRowWithActionPayloadAndGetFn, + promoteActionStrategy, rowClazz, actionPayloadClazz); diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java index d799c646b..f0b094240 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctions.java @@ -34,6 +34,7 @@ public class PromoteActionPayloadFunctions { * @param rowIdFn Function used to get the id of graph table row * @param actionPayloadIdFn Function used to get id of action payload instance * @param mergeAndGetFn Function used to merge graph table row and action payload instance + * @param promoteActionStrategy the Actionset promotion strategy * @param rowClazz Class of graph table * @param actionPayloadClazz Class of action payload * @param Type of graph table row @@ -46,6 +47,7 @@ public class PromoteActionPayloadFunctions { SerializableSupplier> rowIdFn, SerializableSupplier> actionPayloadIdFn, SerializableSupplier> mergeAndGetFn, + PromoteAction.Strategy promoteActionStrategy, Class rowClazz, Class actionPayloadClazz) { if (!isSubClass(rowClazz, actionPayloadClazz)) { @@ -61,7 +63,7 @@ public class PromoteActionPayloadFunctions { .joinWith( actionPayloadWithIdDS, rowWithIdDS.col("_1").equalTo(actionPayloadWithIdDS.col("_1")), - "full_outer") + PromoteAction.joinTypeForStrategy(promoteActionStrategy)) .map( (MapFunction, Tuple2>, G>) value -> { Optional rowOpt = Optional.ofNullable(value._1()).map(Tuple2::_2); diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json index 00c9404ef..81a7c77d7 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/promote/promote_action_payload_for_graph_table_input_parameters.json @@ -41,6 +41,12 @@ "paramDescription": "strategy for merging graph table objects with action payload instances, MERGE_FROM_AND_GET or SELECT_NEWER_AND_GET", "paramRequired": true }, + { + "paramName": "pas", + "paramLongName": "promoteActionStrategy", + "paramDescription": "strategy for promoting the actionset contents into the graph tables, ENRICH or UPSERT (default)", + "paramRequired": false + }, { "paramName": "sgid", "paramLongName": "shouldGroupById", diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml index 4f374a75a..5401b45ca 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/dataset/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Dataset --outputGraphTablePath${workingDir}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/dataset --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml index c85ba4ac1..f9bd66ae3 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/datasource/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Datasource --outputGraphTablePath${outputGraphRootPath}/datasource --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml index 412cad70b..ebfdeee31 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/organization/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Organization --outputGraphTablePath${outputGraphRootPath}/organization --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml index 7bac760e2..02399ed9b 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/otherresearchproduct/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputGraphTablePath${workingDir}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/otherresearchproduct --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml index daf48e9d7..57c2357b4 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/project/oozie_app/workflow.xml @@ -106,6 +106,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Project --outputGraphTablePath${outputGraphRootPath}/project --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml index b76dc82f1..92b114776 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/publication/oozie_app/workflow.xml @@ -115,6 +115,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Publication --outputGraphTablePath${workingDir}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -167,6 +168,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/publication --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml index d3086dbdc..e9e5f0b45 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/relation/oozie_app/workflow.xml @@ -107,6 +107,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Relation --outputGraphTablePath${outputGraphRootPath}/relation --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} diff --git a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml index b5673b18f..1d36ddf94 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-actionmanager/src/main/resources/eu/dnetlib/dhp/actionmanager/wf/software/oozie_app/workflow.xml @@ -114,6 +114,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Software --outputGraphTablePath${workingDir}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} @@ -166,6 +167,7 @@ --actionPayloadClassNameeu.dnetlib.dhp.schema.oaf.Result --outputGraphTablePath${outputGraphRootPath}/software --mergeAndGetStrategy${mergeAndGetStrategy} + --promoteActionStrategy${promoteActionStrategy} --shouldGroupById${shouldGroupById} diff --git a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java index cbc1bfaba..777e2fa1c 100644 --- a/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java +++ b/dhp-workflows/dhp-actionmanager/src/test/java/eu/dnetlib/dhp/actionmanager/promote/PromoteActionPayloadFunctionsTest.java @@ -54,7 +54,7 @@ public class PromoteActionPayloadFunctionsTest { RuntimeException.class, () -> PromoteActionPayloadFunctions .joinGraphTableWithActionPayloadAndMerge( - null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); + null, null, null, null, null, null, OafImplSubSub.class, OafImpl.class)); } @Test @@ -104,6 +104,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSubSub.class) .collectAsList(); @@ -183,6 +184,7 @@ public class PromoteActionPayloadFunctionsTest { rowIdFn, actionPayloadIdFn, mergeAndGetFn, + PromoteAction.Strategy.UPSERT, OafImplSubSub.class, OafImplSub.class) .collectAsList(); diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java index 57ad8b96a..b1ffe7f37 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareFOSSparkJob.java @@ -124,8 +124,19 @@ public class PrepareFOSSparkJob implements Serializable { FOSDataModel first) { level1.add(first.getLevel1()); level2.add(first.getLevel2()); - level3.add(first.getLevel3() + "@@" + first.getScoreL3()); - level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + if (Optional.ofNullable(first.getLevel3()).isPresent() && + !first.getLevel3().equalsIgnoreCase(NA) && !first.getLevel3().equalsIgnoreCase(NULL) + && first.getLevel3() != null) + level3.add(first.getLevel3() + "@@" + first.getScoreL3()); + else + level3.add(NULL); + if (Optional.ofNullable(first.getLevel4()).isPresent() && + !first.getLevel4().equalsIgnoreCase(NA) && + !first.getLevel4().equalsIgnoreCase(NULL) && + first.getLevel4() != null) + level4.add(first.getLevel4() + "@@" + first.getScoreL4()); + else + level4.add(NULL); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json index 8dc8a2aae..3ba83764d 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json @@ -2,7 +2,7 @@ {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": true}, {"paramName":"i", "paramLongName":"isLookupUrl", "paramDescription": "isLookupUrl", "paramRequired": true}, {"paramName":"w", "paramLongName":"workingPath", "paramDescription": "the path of the sequencial file to read", "paramRequired": true}, - {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the oaf path ", "paramRequired": true}, + {"paramName":"mo", "paramLongName":"mdstoreOutputVersion", "paramDescription": "the oaf path ", "paramRequired": true}, {"paramName":"s", "paramLongName":"skipUpdate", "paramDescription": "skip update ", "paramRequired": false}, {"paramName":"h", "paramLongName":"hdfsServerUri", "paramDescription": "the working path ", "paramRequired": true} ] \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml index 8915a090b..30eb41469 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/sx/bio/pubmed/oozie_app/workflow.xml @@ -1,4 +1,4 @@ - + baselineWorkingPath @@ -9,8 +9,12 @@ The IS lookUp service endopoint - targetPath - The target path + mdStoreOutputId + the identifier of the cleaned MDStore + + + mdStoreManagerURI + the path of the cleaned mdstore skipUpdate @@ -19,12 +23,31 @@ - + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionNEW_VERSION + --mdStoreID${mdStoreOutputId} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + yarn @@ -43,16 +66,52 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --workingPath${baselineWorkingPath} - --targetPath${targetPath} + --mdstoreOutputVersion${wf:actionData('StartTransaction')['mdStoreVersion']} --masteryarn --isLookupUrl${isLookupUrl} --hdfsServerUri${nameNode} --skipUpdate${skipUpdate} + + + + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionCOMMIT + --namenode${nameNode} + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + oozie.launcher.mapreduce.user.classpath.first + true + + + eu.dnetlib.dhp.aggregation.mdstore.MDStoreActionNode + --actionROLLBACK + --mdStoreVersion${wf:actionData('StartTransaction')['mdStoreVersion']} + --mdStoreManagerURI${mdStoreManagerURI} + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 8ac8b00bf..639918151 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -2,9 +2,12 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils +import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup +import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} import eu.dnetlib.dhp.sx.bio.pubmed._ +import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -164,11 +167,15 @@ object SparkCreateBaselineDataFrame { val workingPath = parser.get("workingPath") log.info("workingPath: {}", workingPath) - val targetPath = parser.get("targetPath") - log.info("targetPath: {}", targetPath) + val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") + log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion) + + val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) + val outputBasePath = cleanedMdStoreVersion.getHdfsPath + log.info("outputBasePath: {}", outputBasePath) val hdfsServerUri = parser.get("hdfsServerUri") - log.info("hdfsServerUri: {}", targetPath) + log.info("hdfsServerUri: {}", hdfsServerUri) val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) @@ -216,8 +223,11 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)) .as[Oaf] .filter(p => p != null), - targetPath + s"$outputBasePath/$MDSTORE_DATA_PATH" ) + val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH") + val mdStoreSize = df.count + writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH") } } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java index 68af3d699..0af7bb6d0 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/AbstractSparkAction.java @@ -101,6 +101,10 @@ abstract class AbstractSparkAction implements Serializable { return SparkSession.builder().config(conf).getOrCreate(); } + protected static SparkSession getSparkWithHiveSession(SparkConf conf) { + return SparkSession.builder().enableHiveSupport().config(conf).getOrCreate(); + } + protected static void save(Dataset dataset, String outPath, SaveMode mode) { dataset.write().option("compression", "gzip").mode(mode).json(outPath); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java index 60669106a..d5b106c81 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/DedupRecordFactory.java @@ -1,128 +1,212 @@ package eu.dnetlib.dhp.oa.dedup; -import java.lang.reflect.InvocationTargetException; import java.util.*; -import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.commons.beanutils.BeanUtils; import org.apache.commons.lang3.StringUtils; +import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SparkSession; - -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Lists; +import org.apache.spark.api.java.function.ReduceFunction; +import org.apache.spark.sql.*; import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.oa.merge.AuthorMerger; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.Author; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; import scala.Tuple2; +import scala.Tuple3; +import scala.collection.JavaConversions; public class DedupRecordFactory { + public static final class DedupRecordReduceState { + public final String dedupId; - protected static final ObjectMapper OBJECT_MAPPER = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + public final ArrayList aliases = new ArrayList<>(); + + public final HashSet acceptanceDate = new HashSet<>(); + + public OafEntity entity; + + public DedupRecordReduceState(String dedupId, String id, OafEntity entity) { + this.dedupId = dedupId; + this.entity = entity; + if (entity == null) { + aliases.add(id); + } else { + if (Result.class.isAssignableFrom(entity.getClass())) { + Result result = (Result) entity; + if (result.getDateofacceptance() != null + && StringUtils.isNotBlank(result.getDateofacceptance().getValue())) { + acceptanceDate.add(result.getDateofacceptance().getValue()); + } + } + } + } + + public String getDedupId() { + return dedupId; + } + } + + private static final int MAX_ACCEPTANCE_DATE = 20; private DedupRecordFactory() { } - public static Dataset createDedupRecord( + public static Dataset createDedupRecord( final SparkSession spark, final DataInfo dataInfo, final String mergeRelsInputPath, final String entitiesInputPath, - final Class clazz) { + final Class clazz) { - long ts = System.currentTimeMillis(); + final long ts = System.currentTimeMillis(); + final Encoder beanEncoder = Encoders.bean(clazz); + final Encoder kryoEncoder = Encoders.kryo(clazz); // - Dataset> entities = spark + Dataset entities = spark .read() - .textFile(entitiesInputPath) + .schema(Encoders.bean(clazz).schema()) + .json(entitiesInputPath) + .as(beanEncoder) .map( - (MapFunction>) it -> { - T entity = OBJECT_MAPPER.readValue(it, clazz); + (MapFunction>) entity -> { return new Tuple2<>(entity.getId(), entity); }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + Encoders.tuple(Encoders.STRING(), kryoEncoder)) + .selectExpr("_1 AS id", "_2 AS kryoObject"); // : source is the dedup_id, target is the id of the mergedIn - Dataset> mergeRels = spark + Dataset mergeRels = spark .read() .load(mergeRelsInputPath) - .as(Encoders.bean(Relation.class)) .where("relClass == 'merges'") - .map( - (MapFunction>) r -> new Tuple2<>(r.getSource(), r.getTarget()), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + .selectExpr("source as dedupId", "target as id"); return mergeRels - .joinWith(entities, mergeRels.col("_2").equalTo(entities.col("_1")), "inner") + .join(entities, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .select("dedupId", "id", "kryoObject") + .as(Encoders.tuple(Encoders.STRING(), Encoders.STRING(), kryoEncoder)) .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) + (MapFunction, DedupRecordReduceState>) t -> new DedupRecordReduceState( + t._1(), t._2(), t._3()), + Encoders.kryo(DedupRecordReduceState.class)) .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, T>) (key, - values) -> entityMerger(key, values, ts, dataInfo, clazz), - Encoders.bean(clazz)); + (MapFunction) DedupRecordReduceState::getDedupId, Encoders.STRING()) + .reduceGroups( + (ReduceFunction) (t1, t2) -> { + if (t1.entity == null) { + t2.aliases.addAll(t1.aliases); + return t2; + } + if (t1.acceptanceDate.size() < MAX_ACCEPTANCE_DATE) { + t1.acceptanceDate.addAll(t2.acceptanceDate); + } + t1.aliases.addAll(t2.aliases); + t1.entity = reduceEntity(t1.entity, t2.entity); + + return t1; + }) + .flatMap((FlatMapFunction, OafEntity>) t -> { + String dedupId = t._1(); + DedupRecordReduceState agg = t._2(); + + if (agg.acceptanceDate.size() >= MAX_ACCEPTANCE_DATE) { + return Collections.emptyIterator(); + } + + return Stream + .concat( + Stream + .of(agg.getDedupId()) + .map(id -> createDedupOafEntity(id, agg.entity, dataInfo, ts)), + agg.aliases + .stream() + .map(id -> createMergedDedupAliasOafEntity(id, agg.entity, dataInfo, ts))) + .iterator(); + }, beanEncoder); } - public static T entityMerger( - String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) - throws IllegalAccessException, InstantiationException, InvocationTargetException { + private static OafEntity createDedupOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = (OafEntity) BeanUtils.cloneBean(base); + res.setId(id); + res.setDataInfo(dataInfo); + res.setLastupdatetimestamp(ts); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } - final Comparator> idComparator = new IdentifierComparator<>(); + private static OafEntity createMergedDedupAliasOafEntity(String id, OafEntity base, DataInfo dataInfo, long ts) { + try { + OafEntity res = createDedupOafEntity(id, base, dataInfo, ts); + DataInfo ds = (DataInfo) BeanUtils.cloneBean(dataInfo); + ds.setDeletedbyinference(true); + res.setDataInfo(ds); + return res; + } catch (Exception e) { + throw new RuntimeException(e); + } + } - final LinkedList entityList = Lists - .newArrayList(entities) - .stream() - .map(t -> Identifier.newInstance(t._2())) - .sorted(idComparator) - .map(Identifier::getEntity) - .collect(Collectors.toCollection(LinkedList::new)); + private static OafEntity reduceEntity(OafEntity entity, OafEntity duplicate) { - final T entity = clazz.newInstance(); - final T first = entityList.removeFirst(); - - BeanUtils.copyProperties(entity, first); - - final List> authors = Lists.newArrayList(); - - entityList - .forEach( - duplicate -> { - entity.mergeFrom(duplicate); - if (ModelSupport.isSubClass(duplicate, Result.class)) { - Result r1 = (Result) duplicate; - Optional - .ofNullable(r1.getAuthor()) - .ifPresent(a -> authors.add(a)); - } - }); - - // set authors and date - if (ModelSupport.isSubClass(entity, Result.class)) { - Optional - .ofNullable(((Result) entity).getAuthor()) - .ifPresent(a -> authors.add(a)); - - ((Result) entity).setAuthor(AuthorMerger.merge(authors)); + if (duplicate == null) { + return entity; } - entity.setId(id); + int compare = new IdentifierComparator<>() + .compare(Identifier.newInstance(entity), Identifier.newInstance(duplicate)); - entity.setLastupdatetimestamp(ts); - entity.setDataInfo(dataInfo); + if (compare > 0) { + OafEntity swap = duplicate; + duplicate = entity; + entity = swap; + } + + entity.mergeFrom(duplicate); + + if (ModelSupport.isSubClass(duplicate, Result.class)) { + Result re = (Result) entity; + Result rd = (Result) duplicate; + + List> authors = new ArrayList<>(); + if (re.getAuthor() != null) { + authors.add(re.getAuthor()); + } + if (rd.getAuthor() != null) { + authors.add(rd.getAuthor()); + } + + re.setAuthor(AuthorMerger.merge(authors)); + } return entity; } + public static T entityMerger( + String id, Iterator> entities, long ts, DataInfo dataInfo, Class clazz) { + T base = entities.next()._2(); + + while (entities.hasNext()) { + T duplicate = entities.next()._2(); + if (duplicate != null) + base = (T) reduceEntity(base, duplicate); + } + + base.setId(id); + base.setDataInfo(dataInfo); + base.setLastupdatetimestamp(ts); + + return base; + } + } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java index 7e0d66062..1d3d4afdd 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/IdGenerator.java @@ -1,6 +1,7 @@ package eu.dnetlib.dhp.oa.dedup; +import static eu.dnetlib.dhp.utils.DHPUtils.md5; import static org.apache.commons.lang3.StringUtils.substringAfter; import static org.apache.commons.lang3.StringUtils.substringBefore; @@ -14,33 +15,36 @@ import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class IdGenerator implements Serializable { // pick the best pid from the list (consider date and pidtype) - public static String generate(List> pids, String defaultID) { + public static String generate(List pids, String defaultID) { if (pids == null || pids.isEmpty()) return defaultID; return generateId(pids); } - private static String generateId(List> pids) { - Identifier bp = pids + private static String generateId(List pids) { + Identifier bp = pids .stream() .min(Identifier::compareTo) .orElseThrow(() -> new IllegalStateException("unable to generate id")); - String prefix = substringBefore(bp.getOriginalID(), "|"); - String ns = substringBefore(substringAfter(bp.getOriginalID(), "|"), "::"); - String suffix = substringAfter(bp.getOriginalID(), "::"); + return generate(bp.getOriginalID()); + } + + public static String generate(String originalId) { + String prefix = substringBefore(originalId, "|"); + String ns = substringBefore(substringAfter(originalId, "|"), "::"); + String suffix = substringAfter(originalId, "::"); final String pidType = substringBefore(ns, "_"); if (PidType.isValid(pidType)) { return prefix + "|" + dedupify(ns) + "::" + suffix; } else { - return prefix + "|dedup_wf_001::" + suffix; + return prefix + "|dedup_wf_002::" + md5(originalId); // hash the whole originalId to avoid collisions } } private static String dedupify(String ns) { - StringBuilder prefix; if (PidType.valueOf(substringBefore(ns, "_")) == PidType.openorgs) { prefix = new StringBuilder(substringBefore(ns, "_")); @@ -53,5 +57,4 @@ public class IdGenerator implements Serializable { } return prefix.substring(0, 12); } - } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index babbaaabd..59626c141 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -3,49 +3,47 @@ package eu.dnetlib.dhp.oa.dedup; import static eu.dnetlib.dhp.schema.common.ModelConstants.DNET_PROVENANCE_ACTIONS; import static eu.dnetlib.dhp.schema.common.ModelConstants.PROVENANCE_DEDUP; +import static org.apache.spark.sql.functions.*; import java.io.IOException; -import java.util.*; -import java.util.stream.Collectors; +import java.time.LocalDate; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Optional; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; -import org.apache.spark.api.java.function.MapFunction; -import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.graphx.Edge; -import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.encoders.RowEncoder; +import org.apache.spark.sql.expressions.UserDefinedFunction; +import org.apache.spark.sql.expressions.Window; +import org.apache.spark.sql.expressions.WindowSpec; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.StructType; import org.dom4j.DocumentException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.SAXException; -import com.google.common.collect.Lists; import com.google.common.hash.Hashing; +import com.kwartile.lib.cc.ConnectedComponent; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.oa.dedup.graph.ConnectedComponent; -import eu.dnetlib.dhp.oa.dedup.graph.GraphProcessor; -import eu.dnetlib.dhp.oa.dedup.model.Identifier; import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.DataInfo; -import eu.dnetlib.dhp.schema.oaf.OafEntity; -import eu.dnetlib.dhp.schema.oaf.Qualifier; -import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; -import eu.dnetlib.pace.util.MapDocumentUtil; -import scala.Tuple2; +import scala.Tuple3; +import scala.collection.JavaConversions; public class SparkCreateMergeRels extends AbstractSparkAction { @@ -68,10 +66,12 @@ public class SparkCreateMergeRels extends AbstractSparkAction { log.info("isLookupUrl {}", isLookUpUrl); SparkConf conf = new SparkConf(); + conf.set("hive.metastore.uris", parser.get("hiveMetastoreUris")); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); + conf.registerKryoClasses(ModelSupport.getOafModelClasses()); - new SparkCreateMergeRels(parser, getSparkSession(conf)) + new SparkCreateMergeRels(parser, getSparkWithHiveSession(conf)) .run(ISLookupClientFactory.getLookUpService(isLookUpUrl)); } @@ -87,14 +87,15 @@ public class SparkCreateMergeRels extends AbstractSparkAction { .ofNullable(parser.get("cutConnectedComponent")) .map(Integer::valueOf) .orElse(0); + + final String pivotHistoryDatabase = parser.get("pivotHistoryDatabase"); + log.info("connected component cut: '{}'", cut); log.info("graphBasePath: '{}'", graphBasePath); log.info("isLookUpUrl: '{}'", isLookUpUrl); log.info("actionSetId: '{}'", actionSetId); log.info("workingPath: '{}'", workingPath); - final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); - for (DedupConfig dedupConf : getConfigurations(isLookUpService, actionSetId)) { final String subEntity = dedupConf.getWf().getSubEntityValue(); final Class clazz = ModelSupport.entityTypes.get(EntityType.valueOf(subEntity)); @@ -106,113 +107,173 @@ public class SparkCreateMergeRels extends AbstractSparkAction { final String mergeRelPath = DedupUtility.createMergeRelPath(workingPath, actionSetId, subEntity); - // - JavaPairRDD vertexes = createVertexes(sc, graphBasePath, subEntity, dedupConf); - - final RDD> edgeRdd = spark + final Dataset simRels = spark .read() .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) - .as(Encoders.bean(Relation.class)) - .javaRDD() - .map(it -> new Edge<>(hash(it.getSource()), hash(it.getTarget()), it.getRelClass())) - .rdd(); + .select("source", "target"); - Dataset> rawMergeRels = spark - .createDataset( - GraphProcessor - .findCCs(vertexes.rdd(), edgeRdd, maxIterations, cut) - .toJavaRDD() - .filter(k -> k.getIds().size() > 1) - .flatMap(this::ccToRels) - .rdd(), - Encoders.tuple(Encoders.STRING(), Encoders.STRING())); + UserDefinedFunction hashUDF = functions + .udf( + (String s) -> hash(s), DataTypes.LongType); - Dataset> entities = spark + // + Dataset vertexIdMap = simRels + .selectExpr("source as id") + .union(simRels.selectExpr("target as id")) + .distinct() + .withColumn("vertexId", hashUDF.apply(functions.col("id"))); + + // transform simrels into pairs of numeric ids + final Dataset edges = spark .read() - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .map( - (MapFunction>) it -> { - OafEntity entity = OBJECT_MAPPER.readValue(it, clazz); - return new Tuple2<>(entity.getId(), entity); - }, - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))); + .load(DedupUtility.createSimRelPath(workingPath, actionSetId, subEntity)) + .select("source", "target") + .withColumn("source", hashUDF.apply(functions.col("source"))) + .withColumn("target", hashUDF.apply(functions.col("target"))); - Dataset mergeRels = rawMergeRels - .joinWith(entities, rawMergeRels.col("_2").equalTo(entities.col("_1")), "inner") - // , - .map( - (MapFunction, Tuple2>, Tuple2>) value -> new Tuple2<>( - value._1()._1(), value._2()._2()), - Encoders.tuple(Encoders.STRING(), Encoders.kryo(clazz))) - // - .groupByKey( - (MapFunction, String>) Tuple2::_1, Encoders.STRING()) - .mapGroups( - (MapGroupsFunction, ConnectedComponent>) this::generateID, - Encoders.bean(ConnectedComponent.class)) - // + // resolve connected components + // ("vertexId", "groupId") + Dataset cliques = ConnectedComponent + .runOnPairs(edges, 50, spark); + + // transform "vertexId" back to its original string value + // groupId is kept numeric as its string value is not used + // ("id", "groupId") + Dataset rawMergeRels = cliques + .join(vertexIdMap, JavaConversions.asScalaBuffer(Collections.singletonList("vertexId")), "inner") + .drop("vertexId") + .distinct(); + + // empty dataframe if historydatabase is not used + Dataset pivotHistory = spark + .createDataset( + Collections.emptyList(), + RowEncoder + .apply(StructType.fromDDL("id STRING, lastUsage STRING"))); + + if (StringUtils.isNotBlank(pivotHistoryDatabase)) { + pivotHistory = spark + .read() + .table(pivotHistoryDatabase + "." + subEntity) + .selectExpr("id", "lastUsage"); + } + + // depending on resulttype collectefrom and dateofacceptance are evaluated differently + String collectedfromExpr = "false AS collectedfrom"; + String dateExpr = "'' AS date"; + + if (Result.class.isAssignableFrom(clazz)) { + if (Publication.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.CROSSREF_ID + + "') AS collectedfrom"; + } else if (eu.dnetlib.dhp.schema.oaf.Dataset.class.isAssignableFrom(clazz)) { + collectedfromExpr = "array_contains(collectedfrom.key, '" + ModelConstants.DATACITE_ID + + "') AS collectedfrom"; + } + + dateExpr = "dateofacceptance.value AS date"; + } + + // cap pidType at w3id as from there on they are considered equal + UserDefinedFunction mapPid = udf( + (String s) -> Math.min(PidType.tryValueOf(s).ordinal(), PidType.w3id.ordinal()), DataTypes.IntegerType); + + UserDefinedFunction validDate = udf((String date) -> { + if (StringUtils.isNotBlank(date) + && date.matches(DatePicker.DATE_PATTERN) && DatePicker.inRange(date)) { + return date; + } + return LocalDate.now().plusWeeks(1).toString(); + }, DataTypes.StringType); + + Dataset pivotingData = spark + .read() + .schema(Encoders.bean(clazz).schema()) + .json(DedupUtility.createEntityPath(graphBasePath, subEntity)) + .selectExpr( + "id", + "regexp_extract(id, '^\\\\d+\\\\|([^_]+).*::', 1) AS pidType", + collectedfromExpr, + dateExpr) + .withColumn("pidType", mapPid.apply(col("pidType"))) // ordinal of pid type + .withColumn("date", validDate.apply(col("date"))); + + // ordering to selected pivot id + WindowSpec w = Window + .partitionBy("groupId") + .orderBy( + col("lastUsage").desc_nulls_last(), + col("pidType").asc_nulls_last(), + col("collectedfrom").desc_nulls_last(), + col("date").asc_nulls_last(), + col("id").asc_nulls_last()); + + Dataset output = rawMergeRels + .join(pivotHistory, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "full") + .join(pivotingData, JavaConversions.asScalaBuffer(Collections.singletonList("id")), "left") + .withColumn("pivot", functions.first("id").over(w)) + .withColumn("position", functions.row_number().over(w)) .flatMap( - (FlatMapFunction) cc -> ccToMergeRel(cc, dedupConf), - Encoders.bean(Relation.class)); + (FlatMapFunction>) (Row r) -> { + String id = r.getAs("id"); + String dedupId = IdGenerator.generate(id); - saveParquet(mergeRels, mergeRelPath, SaveMode.Overwrite); + String pivot = r.getAs("pivot"); + String pivotDedupId = IdGenerator.generate(pivot); + // filter out id == pivotDedupId + // those are caused by claim expressed on pivotDedupId + // information will be merged after creating deduprecord + if (id.equals(pivotDedupId)) { + return Collections.emptyIterator(); + } + + ArrayList> res = new ArrayList<>(); + + // singleton pivots have null groupId as they do not match rawMergeRels + if (r.isNullAt(r.fieldIndex("groupId"))) { + // the record is existing if it matches pivotingData + if (!r.isNullAt(r.fieldIndex("collectedfrom"))) { + // create relation with old dedup id + res.add(new Tuple3<>(id, dedupId, null)); + } + return res.iterator(); + } + + // this was a pivot in a previous graph but it has been merged into a new group with different + // pivot + if (!r.isNullAt(r.fieldIndex("lastUsage")) && !pivot.equals(id) + && !dedupId.equals(pivotDedupId)) { + // materialize the previous dedup record as a merge relation with the new one + res.add(new Tuple3<>(dedupId, pivotDedupId, null)); + } + + // add merge relations + if (cut <= 0 || r. getAs("position") <= cut) { + res.add(new Tuple3<>(id, pivotDedupId, pivot)); + } + + return res.iterator(); + }, Encoders.tuple(Encoders.STRING(), Encoders.STRING(), Encoders.STRING())) + .distinct() + .flatMap( + (FlatMapFunction, Relation>) (Tuple3 r) -> { + String id = r._1(); + String dedupId = r._2(); + String pivot = r._3(); + + ArrayList res = new ArrayList<>(); + res.add(rel(pivot, dedupId, id, ModelConstants.MERGES, dedupConf)); + res.add(rel(pivot, id, dedupId, ModelConstants.IS_MERGED_IN, dedupConf)); + + return res.iterator(); + }, Encoders.bean(Relation.class)); + + saveParquet(output, mergeRelPath, SaveMode.Overwrite); } } - private ConnectedComponent generateID(String key, Iterator> values) { - - List> identifiers = Lists - .newArrayList(values) - .stream() - .map(v -> Identifier.newInstance(v._2())) - .collect(Collectors.toList()); - - String rootID = IdGenerator.generate(identifiers, key); - - if (Objects.equals(rootID, key)) - throw new IllegalStateException("generated default ID: " + rootID); - - return new ConnectedComponent(rootID, - identifiers.stream().map(i -> i.getEntity().getId()).collect(Collectors.toSet())); - } - - private JavaPairRDD createVertexes(JavaSparkContext sc, String graphBasePath, String subEntity, - DedupConfig dedupConf) { - - return sc - .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .mapToPair(json -> { - String id = MapDocumentUtil.getJPathString(dedupConf.getWf().getIdPath(), json); - return new Tuple2<>(hash(id), id); - }); - } - - private Iterator> ccToRels(ConnectedComponent cc) { - return cc - .getIds() - .stream() - .map(id -> new Tuple2<>(cc.getCcId(), id)) - .iterator(); - } - - private Iterator ccToMergeRel(ConnectedComponent cc, DedupConfig dedupConf) { - return cc - .getIds() - .stream() - .flatMap( - id -> { - List tmp = new ArrayList<>(); - - tmp.add(rel(cc.getCcId(), id, ModelConstants.MERGES, dedupConf)); - tmp.add(rel(id, cc.getCcId(), ModelConstants.IS_MERGED_IN, dedupConf)); - - return tmp.stream(); - }) - .iterator(); - } - - private Relation rel(String source, String target, String relClass, DedupConfig dedupConf) { + private static Relation rel(String pivot, String source, String target, String relClass, DedupConfig dedupConf) { String entityType = dedupConf.getWf().getEntityType(); @@ -238,6 +299,14 @@ public class SparkCreateMergeRels extends AbstractSparkAction { // TODO calculate the trust value based on the similarity score of the elements in the CC r.setDataInfo(info); + + if (pivot != null) { + KeyValue pivotKV = new KeyValue(); + pivotKV.setKey("pivot"); + pivotKV.setValue(pivot); + + r.setProperties(Arrays.asList(pivotKV)); + } return r; } diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java index 65ad0c327..60752a457 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkWhitelistSimRels.java @@ -91,18 +91,12 @@ public class SparkWhitelistSimRels extends AbstractSparkAction { Dataset entities = spark .read() .textFile(DedupUtility.createEntityPath(graphBasePath, subEntity)) - .repartition(numPartitions) - .withColumn("id", functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath())); + .select(functions.get_json_object(new Column("value"), dedupConf.getWf().getIdPath()).as("id")) + .distinct(); - Dataset whiteListRels1 = whiteListRels - .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "inner") - .select("from", "to"); - - Dataset whiteListRels2 = whiteListRels1 - .join(entities, whiteListRels1.col("to").equalTo(entities.col("id")), "inner") - .select("from", "to"); - - Dataset whiteListSimRels = whiteListRels2 + Dataset whiteListSimRels = whiteListRels + .join(entities, entities.col("id").equalTo(whiteListRels.col("from")), "leftsemi") + .join(entities, functions.col("to").equalTo(entities.col("id")), "leftsemi") .map( (MapFunction) r -> DedupUtility .createSimRel(r.getString(0), r.getString(1), entity), diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java deleted file mode 100644 index 4a39a175d..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/ConnectedComponent.java +++ /dev/null @@ -1,100 +0,0 @@ - -package eu.dnetlib.dhp.oa.dedup.graph; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Set; -import java.util.stream.Collectors; - -import org.apache.commons.lang3.StringUtils; -import org.codehaus.jackson.annotate.JsonIgnore; - -import com.fasterxml.jackson.databind.ObjectMapper; - -import eu.dnetlib.dhp.utils.DHPUtils; -import eu.dnetlib.pace.util.PaceException; - -public class ConnectedComponent implements Serializable { - - private String ccId; - private Set ids; - - private static final String CONNECTED_COMPONENT_ID_PREFIX = "connect_comp"; - - public ConnectedComponent(Set ids, final int cut) { - this.ids = ids; - - this.ccId = createDefaultID(); - - if (cut > 0 && ids.size() > cut) { - this.ids = ids - .stream() - .filter(id -> !ccId.equalsIgnoreCase(id)) - .limit(cut - 1) - .collect(Collectors.toSet()); -// this.ids.add(ccId); ?? - } - } - - public ConnectedComponent(String ccId, Set ids) { - this.ccId = ccId; - this.ids = ids; - } - - public String createDefaultID() { - if (ids.size() > 1) { - final String s = getMin(); - String prefix = s.split("\\|")[0]; - ccId = prefix + "|" + CONNECTED_COMPONENT_ID_PREFIX + "::" + DHPUtils.md5(s); - return ccId; - } else { - return ids.iterator().next(); - } - } - - @JsonIgnore - public String getMin() { - - final StringBuilder min = new StringBuilder(); - - ids - .forEach( - id -> { - if (StringUtils.isBlank(min.toString())) { - min.append(id); - } else { - if (min.toString().compareTo(id) > 0) { - min.setLength(0); - min.append(id); - } - } - }); - return min.toString(); - } - - @Override - public String toString() { - ObjectMapper mapper = new ObjectMapper(); - try { - return mapper.writeValueAsString(this); - } catch (IOException e) { - throw new PaceException("Failed to create Json: ", e); - } - } - - public Set getIds() { - return ids; - } - - public void setIds(Set ids) { - this.ids = ids; - } - - public String getCcId() { - return ccId; - } - - public void setCcId(String ccId) { - this.ccId = ccId; - } -} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala deleted file mode 100644 index f4dd85d75..000000000 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/graph/GraphProcessor.scala +++ /dev/null @@ -1,37 +0,0 @@ -package eu.dnetlib.dhp.oa.dedup.graph - -import org.apache.spark.graphx._ -import org.apache.spark.rdd.RDD - -import scala.collection.JavaConversions; - -object GraphProcessor { - - def findCCs(vertexes: RDD[(VertexId, String)], edges: RDD[Edge[String]], maxIterations: Int, cut:Int): RDD[ConnectedComponent] = { - val graph: Graph[String, String] = Graph(vertexes, edges).partitionBy(PartitionStrategy.RandomVertexCut) //TODO remember to remove partitionby - val cc = graph.connectedComponents(maxIterations).vertices - - val joinResult = vertexes.leftOuterJoin(cc).map { - case (id, (openaireId, cc)) => { - if (cc.isEmpty) { - (id, openaireId) - } - else { - (cc.get, openaireId) - } - } - } - val connectedComponents = joinResult.groupByKey() - .map[ConnectedComponent](cc => asConnectedComponent(cc, cut)) - connectedComponents - } - - - - def asConnectedComponent(group: (VertexId, Iterable[String]), cut:Int): ConnectedComponent = { - val docs = group._2.toSet[String] - val connectedComponent = new ConnectedComponent(JavaConversions.setAsJavaSet[String](docs), cut); - connectedComponent - } - -} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java index 0cba4fc3b..e03c3bf95 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/model/Identifier.java @@ -3,21 +3,21 @@ package eu.dnetlib.dhp.oa.dedup.model; import java.io.Serializable; import java.text.SimpleDateFormat; -import java.util.*; -import java.util.stream.Collectors; +import java.time.LocalDate; +import java.util.Date; +import java.util.List; +import java.util.Objects; import org.apache.commons.lang3.StringUtils; -import com.google.common.collect.Sets; - import eu.dnetlib.dhp.oa.dedup.DatePicker; import eu.dnetlib.dhp.oa.dedup.IdentifierComparator; import eu.dnetlib.dhp.schema.common.EntityType; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.*; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; -import eu.dnetlib.dhp.schema.oaf.utils.PidComparator; +import eu.dnetlib.dhp.schema.oaf.Field; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.OafEntity; +import eu.dnetlib.dhp.schema.oaf.Result; import eu.dnetlib.dhp.schema.oaf.utils.PidType; public class Identifier implements Serializable, Comparable> { @@ -50,7 +50,7 @@ public class Identifier implements Serializable, Comparable if (Objects.nonNull(date)) { return date; } else { - String sDate = BASE_DATE; + String sDate = LocalDate.now().plusDays(1).toString(); if (ModelSupport.isSubClass(getEntity(), Result.class)) { Result result = (Result) getEntity(); if (isWellformed(result.getDateofacceptance())) { diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json index b1df08535..4f9f4b0b5 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json @@ -28,5 +28,17 @@ "paramLongName": "workingPath", "paramDescription": "path for the working directory", "paramRequired": true + }, + { + "paramName":"h", + "paramLongName":"hiveMetastoreUris", + "paramDescription": "the hive metastore uris", + "paramRequired": true + }, + { + "paramName": "p", + "paramLongName": "pivotHistoryDatabase", + "paramDescription": "Pivot history database", + "paramRequired": false } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml new file mode 100644 index 000000000..17bb70647 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/config-default.xml @@ -0,0 +1,26 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + sparkSqlWarehouseDir + /user/hive/warehouse + + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql new file mode 100644 index 000000000..86dbda1c9 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql @@ -0,0 +1,62 @@ + +CREATE TABLE `${pivot_history_db}`.`dataset_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`dataset` ON relation.source = dataset.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`dataset` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`publication_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`publication` ON relation.source = publication.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`publication` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`software_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`software` ON relation.source = software.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`software` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ +CREATE TABLE `${pivot_history_db}`.`otherresearchproduct_new` STORED AS PARQUET AS +WITH pivots ( + SELECT property.value AS id, '${new_graph_date}' AS usedIn FROM `${new_graph_db}`.`relation` + LEFT SEMI JOIN `${new_graph_db}`.`otherresearchproduct` ON relation.source = otherresearchproduct.id + LATERAL VIEW EXPLODE(properties) AS property WHERE relClass = 'isMergedIn' AND property.key = 'pivot' +UNION + SELECT id, usedIn FROM `${pivot_history_db}`.`otherresearchproduct` LATERAL VIEW EXPLODE(usages) AS usedIn +) +SELECT id, min(usedIn) as firstUsage, max(usedIn) as lastUsage, collect_set(usedIn) as usages + FROM pivots + GROUP BY id; /*EOS*/ + + +DROP TABLE IF EXISTS `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset` RENAME TO `${pivot_history_db}`.`dataset_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`dataset_new` RENAME TO `${pivot_history_db}`.`dataset`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication` RENAME TO `${pivot_history_db}`.`publication_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`publication_new` RENAME TO `${pivot_history_db}`.`publication`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software` RENAME TO `${pivot_history_db}`.`software_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`software_new` RENAME TO `${pivot_history_db}`.`software`; /*EOS*/ + +DROP TABLE IF EXISTS `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct` RENAME TO `${pivot_history_db}`.`otherresearchproduct_old`; /*EOS*/ +ALTER TABLE `${pivot_history_db}`.`otherresearchproduct_new` RENAME TO `${pivot_history_db}`.`otherresearchproduct`; /*EOS*/ diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml new file mode 100644 index 000000000..d562f088e --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/workflow.xml @@ -0,0 +1,95 @@ + + + + + pivot_history_db + + Pivot history DB on hive + + + new_graph_db + + New graph DB on hive + + + new_graph_date + + Creation date of new graph db + + + + + hiveMetastoreUris + hive server metastore URIs + + + sparkSqlWarehouseDir + + + + sparkClusterOpts + --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory + spark cluster-wide options + + + sparkResourceOpts + --executor-memory=3G --conf spark.executor.memoryOverhead=3G --executor-cores=6 --driver-memory=8G --driver-cores=4 + spark resource options + + + sparkApplicationOpts + --conf spark.sql.shuffle.partitions=3840 + spark resource options + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Upgrade Pivot History + eu.dnetlib.dhp.oozie.RunSQLSparkJob + dhp-dedup-openaire-${projectVersion}.jar + + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + ${sparkClusterOpts} + ${sparkResourceOpts} + ${sparkApplicationOpts} + + --hiveMetastoreUris${hiveMetastoreUris} + --sqleu/dnetlib/dhp/oa/dedup/pivothistory/oozie_app/sql.sql + --pivot_history_db${pivot_history_db} + --new_graph_db${new_graph_db} + --new_graph_date${new_graph_date} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml index 2e0ed9aee..cd29965e3 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/config-default.xml @@ -15,4 +15,8 @@ oozie.action.sharelib.for.spark spark2 + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml index ba2270c8a..49a331def 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-dedup-openaire/src/main/resources/eu/dnetlib/dhp/oa/dedup/scan/oozie_app/workflow.xml @@ -188,6 +188,8 @@ --isLookUpUrl${isLookUpUrl} --actionSetId${actionSetId} --cutConnectedComponent${cutConnectedComponent} + --hiveMetastoreUris${hiveMetastoreUris} + --pivotHistoryDatabase${pivotHistoryDatabase} diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala new file mode 100644 index 000000000..4c3362235 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/main/scala/com/kwartile/lib/cc/ConnectedComponent.scala @@ -0,0 +1,335 @@ +/** Copyright (c) 2017 Kwartile, Inc., http://www.kwartile.com + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** Map-reduce implementation of Connected Component + * Given lists of subgraphs, returns all the nodes that are connected. + */ + +package com.kwartile.lib.cc + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{Dataset, Row, SparkSession} +import org.apache.spark.storage.StorageLevel + +import scala.annotation.tailrec +import scala.collection.mutable + +object ConnectedComponent extends Serializable { + + /** Applies Small Star operation on RDD of nodePairs + * + * @param nodePairs on which to apply Small Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def smallStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) where self > neighbors + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + */ + val neighbors = nodePairs.map(x => { + val (self, neighbor) = (x._1, x._2) + if (self > neighbor) + (self, neighbor) + else + (neighbor, self) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (6, List(1)), (3, List(2)), (6, List(5)) + * will result into (4, List(1)), (6, List(1, 5)), (3, List(2)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + val empty = mutable.HashSet[Long]() + val allNeighbors = neighbors.aggregateByKey(empty)( + (lb, v) => lb += v, + (lb1, lb2) => lb1 ++ lb2 + ) + + /** Apply Small Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + (neighbor <= self && neighbor != minNode) || (self == neighbor) + }) + val uniqueNewNodePairs = newNodePairs.toSet.toList + + /** We count the change by taking a diff of the new node pairs with the old node pairs + */ + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + /** Sum all the changeCounts + */ + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + /** Apply Large Star operation on a RDD of nodePairs + * + * @param nodePairs on which to apply Large Star operations + * @return new nodePairs after the operation and conncectivy change count + */ + private def largeStar(nodePairs: RDD[(Long, Long)]): (RDD[(Long, Long)], Long) = { + + /** generate RDD of (self, List(neighbors)) + * E.g.: nodePairs (1, 4), (6, 1), (3, 2), (6, 5) + * will result into (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + */ + + val neighbors = nodePairs.flatMap(x => { + val (self, neighbor) = (x._1, x._2) + if (self == neighbor) + List((self, neighbor)) + else + List((self, neighbor), (neighbor, self)) + }) + + /** reduce on self to get list of all its neighbors. + * E.g: (4, List(1)), (1, List(4)), (6, List(1)), (1, List(6)), (3, List(2)), (2, List(3)), (6, List(5)), (5, List(6)) + * will result into (4, List(1)), (1, List(4, 6)), (6, List(1, 5)), (3, List(2)), (2, List(3)), (5, List(6)) + * Note: + * (1) you may need to tweak number of partitions. + * (2) also, watch out for data skew. In that case, consider using rangePartitioner + */ + + val localAdd = (s: mutable.HashSet[Long], v: Long) => s += v + val partitionAdd = (s1: mutable.HashSet[Long], s2: mutable.HashSet[Long]) => s1 ++= s2 + val allNeighbors = + neighbors.aggregateByKey(mutable.HashSet.empty[Long] /*, rangePartitioner*/ )(localAdd, partitionAdd) + + /** Apply Large Star operation on (self, List(neighbor)) to get newNodePairs and count the change in connectivity + */ + + val newNodePairsWithChangeCount = allNeighbors + .map(x => { + val self = x._1 + val neighbors = x._2.toList + val minNode = argMin(self :: neighbors) + val newNodePairs = (self :: neighbors) + .map(neighbor => { + (neighbor, minNode) + }) + .filter(x => { + val neighbor = x._1 + val minNode = x._2 + neighbor > self || neighbor == minNode + }) + + val uniqueNewNodePairs = newNodePairs.toSet.toList + val connectivityChangeCount = (uniqueNewNodePairs diff neighbors.map((self, _))).length + (uniqueNewNodePairs, connectivityChangeCount) + }) + .persist(StorageLevel.MEMORY_AND_DISK_SER) + + val totalConnectivityCountChange = newNodePairsWithChangeCount + .mapPartitions(iter => { + val (v, l) = iter.toSeq.unzip + val sum = l.sum + Iterator(sum) + }) + .sum + .toLong + + /** Sum all the changeCounts + */ + val newNodePairs = newNodePairsWithChangeCount.map(x => x._1).flatMap(x => x) + newNodePairsWithChangeCount.unpersist(false) + (newNodePairs, totalConnectivityCountChange) + } + + private def argMin(nodes: List[Long]): Long = { + nodes.min(Ordering.by((node: Long) => node)) + } + + /** Build nodePairs given a list of nodes. A list of nodes represents a subgraph. + * + * @param nodes that are part of a subgraph + * @return nodePairs for a subgraph + */ + private def buildPairs(nodes: List[Long]): List[(Long, Long)] = { + buildPairs(nodes.head, nodes.tail, null.asInstanceOf[List[(Long, Long)]]) + } + + @tailrec + private def buildPairs(node: Long, neighbors: List[Long], partialPairs: List[(Long, Long)]): List[(Long, Long)] = { + if (neighbors.isEmpty) { + if (partialPairs != null) + List((node, node)) ::: partialPairs + else + List((node, node)) + } else if (neighbors.length == 1) { + val neighbor = neighbors(0) + if (node > neighbor) + if (partialPairs != null) List((node, neighbor)) ::: partialPairs else List((node, neighbor)) + else if (partialPairs != null) List((neighbor, node)) ::: partialPairs + else List((neighbor, node)) + } else { + val newPartialPairs = neighbors + .map(neighbor => { + if (node > neighbor) + List((node, neighbor)) + else + List((neighbor, node)) + }) + .flatMap(x => x) + + if (partialPairs != null) + buildPairs(neighbors.head, neighbors.tail, newPartialPairs ::: partialPairs) + else + buildPairs(neighbors.head, neighbors.tail, newPartialPairs) + } + } + + /** Implements alternatingAlgo. Converges when the changeCount is either 0 or does not change from the previous iteration + * + * @param nodePairs for a graph + * @param largeStarConnectivityChangeCount change count that resulted from the previous iteration + * @param smallStarConnectivityChangeCount change count that resulted from the previous iteration + * @param didConverge flag to indicate the alorigth converged + * @param currIterationCount counter to capture number of iterations + * @param maxIterationCount maximum number iterations to try before giving up + * @return RDD of nodePairs + */ + + @tailrec + private def alternatingAlgo( + nodePairs: RDD[(Long, Long)], + largeStarConnectivityChangeCount: Long, + smallStarConnectivityChangeCount: Long, + didConverge: Boolean, + currIterationCount: Int, + maxIterationCount: Int + ): (RDD[(Long, Long)], Boolean, Long) = { + + val iterationCount = currIterationCount + 1 + if (didConverge) + (nodePairs, true, currIterationCount) + else if (currIterationCount >= maxIterationCount) { + (nodePairs, false, currIterationCount) + } else { + + val (nodePairsLargeStar, currLargeStarConnectivityChangeCount) = largeStar(nodePairs) + val (nodePairsSmallStar, currSmallStarConnectivityChangeCount) = smallStar(nodePairsLargeStar) + + if ( + (currLargeStarConnectivityChangeCount == largeStarConnectivityChangeCount && + currSmallStarConnectivityChangeCount == smallStarConnectivityChangeCount) || + (currSmallStarConnectivityChangeCount == 0 && currLargeStarConnectivityChangeCount == 0) + ) { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + true, + iterationCount, + maxIterationCount + ) + } else { + alternatingAlgo( + nodePairsSmallStar, + currLargeStarConnectivityChangeCount, + currSmallStarConnectivityChangeCount, + false, + iterationCount, + maxIterationCount + ) + } + } + } + + /** Driver function + * + * @param cliques list of nodes representing subgraphs (or cliques) + * @param maxIterationCount maximum number iterations to try before giving up + * @return Connected Components as nodePairs where second member of the nodePair is the minimum node in the component + */ + def run(cliques: RDD[List[Long]], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + + val nodePairs = cliques + .map(aClique => { + buildPairs(aClique) + }) + .flatMap(x => x) + + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: RDD[(Long, Long)], maxIterationCount: Int): (RDD[(Long, Long)], Boolean, Long) = { + val (cc, didConverge, iterCount) = alternatingAlgo(nodePairs, 9999999L, 9999999L, false, 0, maxIterationCount) + + if (didConverge) { + (cc, didConverge, iterCount) + } else { + (null.asInstanceOf[RDD[(Long, Long)]], didConverge, iterCount) + } + } + + def runOnPairs(nodePairs: Dataset[Row], maxIterationCount: Int)(implicit spark: SparkSession): Dataset[Row] = { + import spark.implicits._ + + val (cc, didConverge, iterCount) = alternatingAlgo( + nodePairs.map(e => (e.getLong(0), e.getLong(1))).rdd, + 9999999L, + 9999999L, + false, + 0, + maxIterationCount + ) + + if (didConverge) { + cc.toDF("vertexId", "groupId") + } else { + null.asInstanceOf[Dataset[Row]] + } + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index 6c4935637..8b3480e60 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -41,9 +41,13 @@ import com.google.common.collect.Sets; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.*; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.sx.OafUtils; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import scala.Tuple2; @ExtendWith(MockitoExtension.class) @TestMethodOrder(MethodOrderer.OrderAnnotation.class) @@ -97,6 +101,7 @@ public class SparkDedupTest implements Serializable { final SparkConf conf = new SparkConf(); conf.set("spark.sql.shuffle.partitions", "200"); + conf.set("spark.sql.warehouse.dir", testOutputBasePath + "/spark-warehouse"); spark = SparkSession .builder() .appName(SparkDedupTest.class.getSimpleName()) @@ -186,11 +191,11 @@ public class SparkDedupTest implements Serializable { System.out.println("ds_simrel = " + ds_simrel); System.out.println("orp_simrel = " + orp_simrel); - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(168, sw_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(113, sw_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); } @@ -235,10 +240,10 @@ public class SparkDedupTest implements Serializable { .count(); // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) - assertEquals(1538, orgs_simrel); - assertEquals(3523, pubs_simrel); - assertEquals(221, ds_simrel); - assertEquals(3392, orp_simrel); + assertEquals(751, orgs_simrel); + assertEquals(546, pubs_simrel); + assertEquals(148, ds_simrel); + assertEquals(280, orp_simrel); // System.out.println("orgs_simrel = " + orgs_simrel); // System.out.println("pubs_simrel = " + pubs_simrel); // System.out.println("ds_simrel = " + ds_simrel); @@ -268,7 +273,7 @@ public class SparkDedupTest implements Serializable { && rel.getTarget().equalsIgnoreCase(whiteList.get(1).split(WHITELIST_SEPARATOR)[1])) .count() > 0); - assertEquals(170, sw_simrel.count()); + assertEquals(115, sw_simrel.count()); // System.out.println("sw_simrel = " + sw_simrel.count()); } @@ -292,7 +297,9 @@ public class SparkDedupTest implements Serializable { "-w", testOutputBasePath, "-cc", - "3" + "3", + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -365,6 +372,113 @@ public class SparkDedupTest implements Serializable { .deleteDirectory(new File(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel")); } + @Test + @Order(3) + void createMergeRelsWithPivotHistoryTest() throws Exception { + + ArgumentApplicationParser parser = new ArgumentApplicationParser( + classPathResourceAsString("/eu/dnetlib/dhp/oa/dedup/createCC_parameters.json")); + + spark.sql("CREATE DATABASE IF NOT EXISTS pivot_history_test"); + ModelSupport.oafTypes.keySet().forEach(entityType -> { + try { + spark + .read() + .json( + Paths + .get(SparkDedupTest.class.getResource("/eu/dnetlib/dhp/dedup/pivot_history").toURI()) + .toFile() + .getAbsolutePath()) + .write() + .mode("overwrite") + .saveAsTable("pivot_history_test." + entityType); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + parser + .parseArgument( + new String[] { + "-i", + testGraphBasePath, + "-asi", + testActionSetId, + "-la", + "lookupurl", + "-w", + testOutputBasePath, + "-h", + "", + "-pivotHistoryDatabase", + "pivot_history_test" + + }); + + new SparkCreateMergeRels(parser, spark).run(isLookUpService); + + long orgs_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") + .count(); + final Dataset pubs = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/publication_mergerel") + .as(Encoders.bean(Relation.class)); + long sw_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/software_mergerel") + .count(); + long ds_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/dataset_mergerel") + .count(); + + long orp_mergerel = spark + .read() + .load(testOutputBasePath + "/" + testActionSetId + "/otherresearchproduct_mergerel") + .count(); + + final List merges = pubs + .filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, merges.size()); + Set dups = Sets + .newHashSet( + "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", + "50|doi_________::d5021b53204e4fdeab6ff5d5bc468032", + "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c"); + merges.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertTrue(dups.contains(r.getTarget())); + }); + + final List mergedIn = pubs + .filter("target == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") + .collectAsList(); + assertEquals(3, mergedIn.size()); + mergedIn.forEach(r -> { + assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); + assertEquals(ModelConstants.DEDUP, r.getSubRelType()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); + assertTrue(dups.contains(r.getSource())); + }); + + assertEquals(1268, orgs_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); +// System.out.println("orgs_mergerel = " + orgs_mergerel); +// System.out.println("pubs_mergerel = " + pubs_mergerel); +// System.out.println("sw_mergerel = " + sw_mergerel); +// System.out.println("ds_mergerel = " + ds_mergerel); +// System.out.println("orp_mergerel = " + orp_mergerel); + + } + @Test @Order(4) void createMergeRelsTest() throws Exception { @@ -382,7 +496,9 @@ public class SparkDedupTest implements Serializable { "-la", "lookupurl", "-w", - testOutputBasePath + testOutputBasePath, + "-h", + "" }); new SparkCreateMergeRels(parser, spark).run(isLookUpService); @@ -437,10 +553,10 @@ public class SparkDedupTest implements Serializable { }); assertEquals(1268, orgs_mergerel); - assertEquals(1450, pubs.count()); - assertEquals(286, sw_mergerel); - assertEquals(472, ds_mergerel); - assertEquals(738, orp_mergerel); + assertEquals(1112, pubs.count()); + assertEquals(292, sw_mergerel); + assertEquals(476, ds_mergerel); + assertEquals(742, orp_mergerel); // System.out.println("orgs_mergerel = " + orgs_mergerel); // System.out.println("pubs_mergerel = " + pubs_mergerel); // System.out.println("sw_mergerel = " + sw_mergerel); @@ -492,8 +608,8 @@ public class SparkDedupTest implements Serializable { .count(); assertEquals(86, orgs_deduprecord); - assertEquals(68, pubs.count()); - assertEquals(49, sw_deduprecord); + assertEquals(91, pubs.count()); + assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); assertEquals(92, orp_deduprecord); @@ -629,11 +745,11 @@ public class SparkDedupTest implements Serializable { .distinct() .count(); - assertEquals(902, publications); + assertEquals(925, publications); assertEquals(839, organizations); assertEquals(100, projects); assertEquals(100, datasource); - assertEquals(198, softwares); + assertEquals(196, softwares); assertEquals(389, dataset); assertEquals(520, otherresearchproduct); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json index fa889d63b..ff6670f1e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/ds.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json index b45b6ae83..a4a3761a3 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/orp.curr.conf.json @@ -101,7 +101,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "authors", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json index 15ebc7a6a..c3a769874 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/pub.curr.conf.json @@ -29,9 +29,8 @@ }, "pace": { "clustering" : [ - { "name" : "ngrampairs", "fields" : [ "title" ], "params" : { "max" : "1", "ngramLen" : "3"} }, - { "name" : "suffixprefix", "fields" : [ "title" ], "params" : { "max" : "1", "len" : "3" } }, - { "name" : "lowercase", "fields" : [ "doi" ], "params" : { } } + { "name" : "numAuthorsTitleSuffixPrefixChain", "fields" : [ "num_authors", "title" ], "params" : { "mod" : "10" } }, + { "name" : "jsonlistclustering", "fields" : [ "pid" ], "params" : { "jpath_value": "$.value", "jpath_classid": "$.qualifier.classid"} } ], "decisionTree": { "start": { @@ -79,13 +78,37 @@ "ignoreUndefined": "false" }, "layer3": { + "fields": [ + { + "field": "authors", + "comparator": "authorsMatch", + "weight": 1.0, + "countIfUndefined": "false", + "params": { + "surname_th": 0.75, + "fullname_th": 0.75, + "threshold": 0.6, + "mode": "full" + } + } + ], + "threshold": 0.6, + "aggregation": "MAX", + "positive": "layer4", + "negative": "NO_MATCH", + "undefined": "MATCH", + "ignoreUndefined": "true" + }, + "layer4": { "fields": [ { "field": "title", "comparator": "levensteinTitle", "weight": 1.0, "countIfUndefined": "true", - "params": {} + "params": { + "threshold": "0.99" + } } ], "threshold": 0.99, @@ -97,23 +120,25 @@ } }, "model": [ - { - "name": "doi", - "type": "String", - "path": "$.pid[?(@.qualifier.classid == 'doi')].value" - }, { "name": "pid", "type": "JSON", "path": "$.pid", "overrideMatch": "true" }, + { + "name": "alternateid", + "type": "JSON", + "path": "$.instance[*].alternateIdentifier[*]", + "overrideMatch": "true" + }, { "name": "title", "type": "String", "path": "$.title[?(@.qualifier.classid == 'main title')].value", "length": 250, - "size": 5 + "size": 5, + "clean": "title" }, { "name": "authors", @@ -122,9 +147,9 @@ "size": 200 }, { - "name": "resulttype", + "name": "num_authors", "type": "String", - "path": "$.resulttype.classid" + "path": "$.author.length()" } ], "blacklists": { diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json index f53ff385f..3c6c8aa5f 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/sw.curr.conf.json @@ -75,7 +75,8 @@ "type" : "String", "path" : "$.title[?(@.qualifier.classid == 'main title')].value", "length" : 250, - "size" : 5 + "size" : 5, + "clean": "title" }, { "name" : "url", diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json new file mode 100644 index 000000000..8af1a6d06 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/pivot_history/pivot_history.json @@ -0,0 +1 @@ +{"id": "50|arXiv_______::c93aeb433eb90ed7a86e29be00791b7c", "firstUsage": "2022-01-01", "lastUsage": "2022-01-01", "dedupId": "50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c" } \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala index ee857e2c4..64090733d 100644 --- a/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-doiboost/src/main/scala/eu/dnetlib/doiboost/crossref/Crossref2Oaf.scala @@ -25,7 +25,7 @@ case class mappingAffiliation(name: String) {} case class mappingAuthor( given: Option[String], - family: String, + family: Option[String], sequence: Option[String], ORCID: Option[String], affiliation: Option[mappingAffiliation] @@ -226,14 +226,14 @@ case object Crossref2Oaf { //Mapping Author val authorList: List[mappingAuthor] = - (json \ "author").extractOrElse[List[mappingAuthor]](List()) + (json \ "author").extract[List[mappingAuthor]].filter(a => a.family.isDefined) val sorted_list = authorList.sortWith((a: mappingAuthor, b: mappingAuthor) => a.sequence.isDefined && a.sequence.get.equalsIgnoreCase("first") ) result.setAuthor(sorted_list.zipWithIndex.map { case (a, index) => - generateAuhtor(a.given.orNull, a.family, a.ORCID.orNull, index) + generateAuhtor(a.given.orNull, a.family.get, a.ORCID.orNull, index) }.asJava) // Mapping instance diff --git a/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json new file mode 100644 index 000000000..8e75f3586 --- /dev/null +++ b/dhp-workflows/dhp-doiboost/src/test/resources/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json @@ -0,0 +1 @@ +{"indexed":{"date-parts":[[2023,12,29]],"date-time":"2023-12-29T10:40:34Z","timestamp":1703846434800},"reference-count":65,"publisher":"Springer Science and Business Media LLC","license":[{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2023,2,9]],"date-time":"2023-02-09T00:00:00Z","timestamp":1675900800000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":["Nat. Phys."],"DOI":"10.1038\/s41567-022-01757-y","type":"journal-article","created":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:19Z","timestamp":1676043139000},"update-policy":"http:\/\/dx.doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":1,"title":["Observation of electroweak production of two jets and a Z-boson pair"],"prefix":"10.1038","author":[{"name":"ATLAS Collaboration","sequence":"first","affiliation":[]},{"given":"G.","family":"Aad","sequence":"additional","affiliation":[]},{"given":"B.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Abbott","sequence":"additional","affiliation":[]},{"given":"A.","family":"Abed Abud","sequence":"additional","affiliation":[]},{"given":"K.","family":"Abeling","sequence":"additional","affiliation":[]},{"given":"D. K.","family":"Abhayasinghe","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Abidi","sequence":"additional","affiliation":[]},{"given":"O. S.","family":"AbouZeid","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Abraham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abramowicz","sequence":"additional","affiliation":[]},{"given":"H.","family":"Abreu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Abulaiti","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Acharya","sequence":"additional","affiliation":[]},{"given":"B.","family":"Achkar","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adachi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adam","sequence":"additional","affiliation":[]},{"given":"C. Adam","family":"Bourdarios","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamczyk","sequence":"additional","affiliation":[]},{"given":"L.","family":"Adamek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Adelman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Adersberger","sequence":"additional","affiliation":[]},{"given":"A.","family":"Adiguzel","sequence":"additional","affiliation":[]},{"given":"S.","family":"Adorni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Adye","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Affolder","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Afik","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agapopoulou","sequence":"additional","affiliation":[]},{"given":"M. N.","family":"Agaras","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aggarwal","sequence":"additional","affiliation":[]},{"given":"C.","family":"Agheorghiesei","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Aguilar-Saavedra","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ahmadov","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Ahmed","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ai","sequence":"additional","affiliation":[]},{"given":"G.","family":"Aielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Akatsuka","sequence":"additional","affiliation":[]},{"given":"T. P. A.","family":"\u00c5kesson","sequence":"additional","affiliation":[]},{"given":"E.","family":"Akilli","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Akimov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Al Khoury","sequence":"additional","affiliation":[]},{"given":"G. L.","family":"Alberghi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Albert","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Alconada Verzini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Alderweireldt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aleksa","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Aleksandrov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alexa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Alexopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alfonsi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alhroob","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ali","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aliev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Alimonti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Allaire","sequence":"additional","affiliation":[]},{"given":"B. M. M.","family":"Allbrooke","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Allen","sequence":"additional","affiliation":[]},{"given":"P. P.","family":"Allport","sequence":"additional","affiliation":[]},{"given":"A.","family":"Aloisio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Alonso","sequence":"additional","affiliation":[]},{"given":"C.","family":"Alpigiani","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Alshehri","sequence":"additional","affiliation":[]},{"given":"E.","family":"Alunno Camelia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Alvarez Estevez","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Alviggi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Amaral Coutinho","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ambler","sequence":"additional","affiliation":[]},{"given":"L.","family":"Ambroz","sequence":"additional","affiliation":[]},{"given":"C.","family":"Amelung","sequence":"additional","affiliation":[]},{"given":"D.","family":"Amidei","sequence":"additional","affiliation":[]},{"given":"S. P. Amor","family":"Dos Santos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Amoroso","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Amrouche","sequence":"additional","affiliation":[]},{"given":"F.","family":"An","sequence":"additional","affiliation":[]},{"given":"C.","family":"Anastopoulos","sequence":"additional","affiliation":[]},{"given":"N.","family":"Andari","sequence":"additional","affiliation":[]},{"given":"T.","family":"Andeen","sequence":"additional","affiliation":[]},{"given":"C. F.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Anders","sequence":"additional","affiliation":[]},{"given":"A.","family":"Andreazza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Andrei","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Anelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Angelidakis","sequence":"additional","affiliation":[]},{"given":"A.","family":"Angerami","sequence":"additional","affiliation":[]},{"given":"A. V.","family":"Anisenkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Annovi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Antel","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Anthony","sequence":"additional","affiliation":[]},{"given":"E.","family":"Antipov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Antonelli","sequence":"additional","affiliation":[]},{"given":"D. J. A.","family":"Antrim","sequence":"additional","affiliation":[]},{"given":"F.","family":"Anulli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Aoki","sequence":"additional","affiliation":[]},{"given":"J. A. Aparisi","family":"Pozo","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Aparo","sequence":"additional","affiliation":[]},{"given":"L. Aperio","family":"Bella","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Araque","sequence":"additional","affiliation":[]},{"given":"V. Araujo","family":"Ferraz","sequence":"additional","affiliation":[]},{"given":"R. Araujo","family":"Pereira","sequence":"additional","affiliation":[]},{"given":"C.","family":"Arcangeletti","sequence":"additional","affiliation":[]},{"given":"A. T. H.","family":"Arce","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Arduh","sequence":"additional","affiliation":[]},{"given":"J-F.","family":"Arguin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Argyropoulos","sequence":"additional","affiliation":[]},{"given":"J.-H.","family":"Arling","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Armbruster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Armstrong","sequence":"additional","affiliation":[]},{"given":"O.","family":"Arnaez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Arnold","sequence":"additional","affiliation":[]},{"given":"Z. P. Arrubarrena","family":"Tame","sequence":"additional","affiliation":[]},{"given":"G.","family":"Artoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Artz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Asai","sequence":"additional","affiliation":[]},{"given":"T.","family":"Asawatavonvanich","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Asbah","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Asimakopoulou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Asquith","sequence":"additional","affiliation":[]},{"given":"J.","family":"Assahsah","sequence":"additional","affiliation":[]},{"given":"K.","family":"Assamagan","sequence":"additional","affiliation":[]},{"given":"R.","family":"Astalos","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Atkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Atkinson","sequence":"additional","affiliation":[]},{"given":"N. B.","family":"Atlay","sequence":"additional","affiliation":[]},{"given":"H.","family":"Atmani","sequence":"additional","affiliation":[]},{"given":"K.","family":"Augsten","sequence":"additional","affiliation":[]},{"given":"G.","family":"Avolio","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Ayoub","sequence":"additional","affiliation":[]},{"given":"G.","family":"Azuelos","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bachacou","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bachas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Backes","sequence":"additional","affiliation":[]},{"given":"F.","family":"Backman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bagnaia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Bahrasemani","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Bailey","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Baines","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bakalis","sequence":"additional","affiliation":[]},{"given":"O. K.","family":"Baker","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bakker","sequence":"additional","affiliation":[]},{"given":"D. Bakshi","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Balaji","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Baldin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Balek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Balli","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Balunas","sequence":"additional","affiliation":[]},{"given":"J.","family":"Balz","sequence":"additional","affiliation":[]},{"given":"E.","family":"Banas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bandyopadhyay","sequence":"additional","affiliation":[]},{"given":"Sw.","family":"Banerjee","sequence":"additional","affiliation":[]},{"given":"A. A. E.","family":"Bannoura","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barak","sequence":"additional","affiliation":[]},{"given":"W. M.","family":"Barbe","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Barberio","sequence":"additional","affiliation":[]},{"given":"D.","family":"Barberis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Barbero","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barbour","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barillari","sequence":"additional","affiliation":[]},{"given":"M-S.","family":"Barisits","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barkeloo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Barklow","sequence":"additional","affiliation":[]},{"given":"R.","family":"Barnea","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Barnett","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Barnovska-Blenessy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Baroncelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Barone","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Barr","sequence":"additional","affiliation":[]},{"given":"L.","family":"Barranco Navarro","sequence":"additional","affiliation":[]},{"given":"F.","family":"Barreiro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Barreiro Guimar\u00e3es da Costa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Barsov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bartoldus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bartolini","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Barton","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bartos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basalaev","sequence":"additional","affiliation":[]},{"given":"A.","family":"Basan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bassalat","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Basso","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Bates","sequence":"additional","affiliation":[]},{"given":"S.","family":"Batlamous","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Batley","sequence":"additional","affiliation":[]},{"given":"B.","family":"Batool","sequence":"additional","affiliation":[]},{"given":"M.","family":"Battaglia","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bauce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"K. T.","family":"Bauer","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Bawa","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Beacham","sequence":"additional","affiliation":[]},{"given":"T.","family":"Beau","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Beauchemin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Becherer","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bechtle","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Beck","sequence":"additional","affiliation":[]},{"given":"K.","family":"Becker","sequence":"additional","affiliation":[]},{"given":"C.","family":"Becot","sequence":"additional","affiliation":[]},{"given":"A.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Beddall","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Bednyakov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bedognetti","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Bee","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Beermann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begalli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Begel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Behera","sequence":"additional","affiliation":[]},{"given":"J. K.","family":"Behr","sequence":"additional","affiliation":[]},{"given":"F.","family":"Beisiegel","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Bell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bellagamba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bellerive","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bellos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Beloborodov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Belotskiy","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Belyaev","sequence":"additional","affiliation":[]},{"given":"D.","family":"Benchekroun","sequence":"additional","affiliation":[]},{"given":"N.","family":"Benekos","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Benhammou","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Benjamin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Benoit","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Bensinger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bentvelsen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Beresford","sequence":"additional","affiliation":[]},{"given":"M.","family":"Beretta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Berge","sequence":"additional","affiliation":[]},{"given":"E. Bergeaas","family":"Kuutmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Berger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Bergmann","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Bergsten","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beringer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Berlendis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bernardi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bernius","sequence":"additional","affiliation":[]},{"given":"F. U.","family":"Bernlochner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Berry","sequence":"additional","affiliation":[]},{"given":"P.","family":"Berta","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bertella","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Bertram","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bessidskaia Bylund","sequence":"additional","affiliation":[]},{"given":"N.","family":"Besson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bethani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bethke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Betti","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bevan","sequence":"additional","affiliation":[]},{"given":"J.","family":"Beyer","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Bhattacharya","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bhattarai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bi","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Bianchi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Biebel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biedermann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Bielski","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bierwagen","sequence":"additional","affiliation":[]},{"given":"N. V.","family":"Biesuz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Biglietti","sequence":"additional","affiliation":[]},{"given":"T. R. V.","family":"Billoud","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bindi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bingul","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Biondi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Birman","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bisanz","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Biswal","sequence":"additional","affiliation":[]},{"given":"D.","family":"Biswas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bitadze","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bittrich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bj\u00f8rke","sequence":"additional","affiliation":[]},{"given":"T.","family":"Blazek","sequence":"additional","affiliation":[]},{"given":"I.","family":"Bloch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Blocker","sequence":"additional","affiliation":[]},{"given":"A.","family":"Blue","sequence":"additional","affiliation":[]},{"given":"U.","family":"Blumenschein","sequence":"additional","affiliation":[]},{"given":"G. J.","family":"Bobbink","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Bobrovnikov","sequence":"additional","affiliation":[]},{"given":"S. S.","family":"Bocchetta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bocci","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bogavac","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Bogdanchikov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Bohm","sequence":"additional","affiliation":[]},{"given":"V.","family":"Boisvert","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bokan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Bold","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Bolz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bomben","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bona","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Bonilla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Boonekamp","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Booth","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Borecka-Bielska","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Borgna","sequence":"additional","affiliation":[]},{"given":"A.","family":"Borisov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Borissov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bortfeldt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bortoletto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boscherini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bosman","sequence":"additional","affiliation":[]},{"given":"J. D. Bossio","family":"Sola","sequence":"additional","affiliation":[]},{"given":"K.","family":"Bouaouda","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boudreau","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Bouhova-Thacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boumediene","sequence":"additional","affiliation":[]},{"given":"S. K.","family":"Boutle","sequence":"additional","affiliation":[]},{"given":"A.","family":"Boveia","sequence":"additional","affiliation":[]},{"given":"J.","family":"Boyd","sequence":"additional","affiliation":[]},{"given":"D.","family":"Boye","sequence":"additional","affiliation":[]},{"given":"I. R.","family":"Boyko","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Bozson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Bracinik","sequence":"additional","affiliation":[]},{"given":"N.","family":"Brahimi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"O.","family":"Brandt","sequence":"additional","affiliation":[]},{"given":"F.","family":"Braren","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Brau","sequence":"additional","affiliation":[]},{"given":"W. D. Breaden","family":"Madden","sequence":"additional","affiliation":[]},{"given":"K.","family":"Brendlinger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brenner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bressler","sequence":"additional","affiliation":[]},{"given":"B.","family":"Brickwedde","sequence":"additional","affiliation":[]},{"given":"D. L.","family":"Briglin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britton","sequence":"additional","affiliation":[]},{"given":"D.","family":"Britzger","sequence":"additional","affiliation":[]},{"given":"I.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"R.","family":"Brock","sequence":"additional","affiliation":[]},{"given":"G.","family":"Brooijmans","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Brooks","sequence":"additional","affiliation":[]},{"given":"E.","family":"Brost","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Broughton","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Bruckman de Renstrom","sequence":"additional","affiliation":[]},{"given":"D.","family":"Bruncko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Bruni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Bruno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Bruschi","sequence":"additional","affiliation":[]},{"given":"N.","family":"Bruscino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Bryant","sequence":"additional","affiliation":[]},{"given":"L.","family":"Bryngemark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Buanes","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Buat","sequence":"additional","affiliation":[]},{"given":"P.","family":"Buchholz","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Buckley","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Budagov","sequence":"additional","affiliation":[]},{"given":"M. K.","family":"Bugge","sequence":"additional","affiliation":[]},{"given":"F.","family":"B\u00fchrer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Bulekov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Burch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Burdin","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burgard","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Burger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Burghgrave","sequence":"additional","affiliation":[]},{"given":"J. T. P.","family":"Burr","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Burton","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Burzynski","sequence":"additional","affiliation":[]},{"given":"V.","family":"B\u00fcscher","sequence":"additional","affiliation":[]},{"given":"E.","family":"Buschmann","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Bussey","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butler","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Buttar","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Butterworth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Butti","sequence":"additional","affiliation":[]},{"given":"W.","family":"Buttinger","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Buxo Vazquez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Buzatu","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Buzykaev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cabras","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cabrera Urb\u00e1n","sequence":"additional","affiliation":[]},{"given":"D.","family":"Caforio","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cai","sequence":"additional","affiliation":[]},{"given":"V. M. M.","family":"Cairo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"N.","family":"Calace","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calafiura","sequence":"additional","affiliation":[]},{"given":"A.","family":"Calandri","sequence":"additional","affiliation":[]},{"given":"G.","family":"Calderini","sequence":"additional","affiliation":[]},{"given":"P.","family":"Calfayan","sequence":"additional","affiliation":[]},{"given":"G.","family":"Callea","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Caloba","sequence":"additional","affiliation":[]},{"given":"A.","family":"Caltabiano","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvente Lopez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"S.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Calvet","sequence":"additional","affiliation":[]},{"given":"M.","family":"Calvetti","sequence":"additional","affiliation":[]},{"given":"R.","family":"Camacho Toro","sequence":"additional","affiliation":[]},{"given":"S.","family":"Camarda","sequence":"additional","affiliation":[]},{"given":"D.","family":"Camarero Munoz","sequence":"additional","affiliation":[]},{"given":"P.","family":"Camarri","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cameron","sequence":"additional","affiliation":[]},{"given":"C.","family":"Camincher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Campana","sequence":"additional","affiliation":[]},{"given":"M.","family":"Campanelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Camplani","sequence":"additional","affiliation":[]},{"given":"A.","family":"Campoverde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Canale","sequence":"additional","affiliation":[]},{"given":"A.","family":"Canesse","sequence":"additional","affiliation":[]},{"given":"M. Cano","family":"Bret","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cantero","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Cao","sequence":"additional","affiliation":[]},{"given":"M. D. M.","family":"Capeans Garrido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Capua","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cardarelli","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cardillo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carducci","sequence":"additional","affiliation":[]},{"given":"I.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"T.","family":"Carli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Carlino","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Carlson","sequence":"additional","affiliation":[]},{"given":"L.","family":"Carminati","sequence":"additional","affiliation":[]},{"given":"R. M. D.","family":"Carney","sequence":"additional","affiliation":[]},{"given":"S.","family":"Caron","sequence":"additional","affiliation":[]},{"given":"E.","family":"Carquin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Carr\u00e1","sequence":"additional","affiliation":[]},{"given":"J. W. S.","family":"Carter","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Casado","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Casha","sequence":"additional","affiliation":[]},{"given":"R.","family":"Castelijn","sequence":"additional","affiliation":[]},{"given":"F. L.","family":"Castillo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Castillo Garcia","sequence":"additional","affiliation":[]},{"given":"V.","family":"Castillo Gimenez","sequence":"additional","affiliation":[]},{"given":"N. F.","family":"Castro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Catinaccio","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Catmore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cattai","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavaliere","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cavallaro","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cavalli-Sforza","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cavasinni","sequence":"additional","affiliation":[]},{"given":"E.","family":"Celebi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerda Alberich","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cerny","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Cerqueira","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cerri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Cerrito","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cerutti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cervelli","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Cetin","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Chadi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Chakraborty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Chan","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Chapman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chargeishvili","sequence":"additional","affiliation":[]},{"given":"D. G.","family":"Charlton","sequence":"additional","affiliation":[]},{"given":"T. P.","family":"Charman","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Chau","sequence":"additional","affiliation":[]},{"given":"S.","family":"Che","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chekanov","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Chekulaev","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Chelkov","sequence":"additional","affiliation":[]},{"given":"B.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"C. H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"Y-H.","family":"Chen","sequence":"additional","affiliation":[]},{"given":"H. C.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Cheng","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cheplakov","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheremushkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Cherkaoui El Moursli","sequence":"additional","affiliation":[]},{"given":"E.","family":"Cheu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cheung","sequence":"additional","affiliation":[]},{"given":"T. J. A.","family":"Cheval\u00e9rias","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chevalier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Chiarella","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiarelli","sequence":"additional","affiliation":[]},{"given":"G.","family":"Chiodini","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Chisholm","sequence":"additional","affiliation":[]},{"given":"A.","family":"Chitan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"Y. H.","family":"Chiu","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Chizhov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Choi","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Chomont","sequence":"additional","affiliation":[]},{"given":"S.","family":"Chouridou","sequence":"additional","affiliation":[]},{"given":"E. Y. S.","family":"Chow","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Chu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Chudoba","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Chwastowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"Chytka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cieri","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ciesla","sequence":"additional","affiliation":[]},{"given":"D.","family":"Cinca","sequence":"additional","affiliation":[]},{"given":"V.","family":"Cindro","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Cioar\u0103","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ciocio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cirotto","sequence":"additional","affiliation":[]},{"given":"Z. H.","family":"Citron","sequence":"additional","affiliation":[]},{"given":"M.","family":"Citterio","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Ciubotaru","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Ciungu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Clark","sequence":"additional","affiliation":[]},{"given":"C.","family":"Clement","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Coadou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cobal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Coccaro","sequence":"additional","affiliation":[]},{"given":"J.","family":"Cochran","sequence":"additional","affiliation":[]},{"given":"R.","family":"Coelho Lopes De Sa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Cohen","sequence":"additional","affiliation":[]},{"given":"A. E. C.","family":"Coimbra","sequence":"additional","affiliation":[]},{"given":"B.","family":"Cole","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Colijn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Collot","sequence":"additional","affiliation":[]},{"given":"P. Conde","family":"Mui\u00f1o","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Connell","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Connelly","sequence":"additional","affiliation":[]},{"given":"S.","family":"Constantinescu","sequence":"additional","affiliation":[]},{"given":"F.","family":"Conventi","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Cooper-Sarkar","sequence":"additional","affiliation":[]},{"given":"F.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"K. J. R.","family":"Cormier","sequence":"additional","affiliation":[]},{"given":"L. D.","family":"Corpe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Corradi","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Corrigan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Corriveau","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Costa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Costanza","sequence":"additional","affiliation":[]},{"given":"D.","family":"Costanzo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Cowan","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Cowley","sequence":"additional","affiliation":[]},{"given":"J.","family":"Crane","sequence":"additional","affiliation":[]},{"given":"K.","family":"Cranmer","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Crawley","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Creager","sequence":"additional","affiliation":[]},{"given":"S.","family":"Cr\u00e9p\u00e9-Renaudin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Crescioli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Cristinziani","sequence":"additional","affiliation":[]},{"given":"V.","family":"Croft","sequence":"additional","affiliation":[]},{"given":"G.","family":"Crosetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Cueto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Cuhadar Donszelmann","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Cukierman","sequence":"additional","affiliation":[]},{"given":"W. R.","family":"Cunningham","sequence":"additional","affiliation":[]},{"given":"S.","family":"Czekierda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Czodrowski","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Da Cunha Sargedas De Sousa","sequence":"additional","affiliation":[]},{"given":"J. V.","family":"Da Fonseca Pinto","sequence":"additional","affiliation":[]},{"given":"C.","family":"Da Via","sequence":"additional","affiliation":[]},{"given":"W.","family":"Dabrowski","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dachs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dado","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dahbi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dai","sequence":"additional","affiliation":[]},{"given":"C.","family":"Dallapiccola","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dam","sequence":"additional","affiliation":[]},{"given":"G.","family":"D\u2019amen","sequence":"additional","affiliation":[]},{"given":"V.","family":"D\u2019Amico","sequence":"additional","affiliation":[]},{"given":"J.","family":"Damp","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Dandoy","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Daneri","sequence":"additional","affiliation":[]},{"given":"N. S.","family":"Dann","sequence":"additional","affiliation":[]},{"given":"M.","family":"Danninger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Dao","sequence":"additional","affiliation":[]},{"given":"G.","family":"Darbo","sequence":"additional","affiliation":[]},{"given":"O.","family":"Dartsi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dattagupta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Daubney","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u2019Auria","sequence":"additional","affiliation":[]},{"given":"C.","family":"David","sequence":"additional","affiliation":[]},{"given":"T.","family":"Davidek","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Davis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Dawson","sequence":"additional","affiliation":[]},{"given":"K.","family":"De","sequence":"additional","affiliation":[]},{"given":"R.","family":"De Asmundis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Beurs","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Castro","sequence":"additional","affiliation":[]},{"given":"S.","family":"De Cecco","sequence":"additional","affiliation":[]},{"given":"N.","family":"De Groot","sequence":"additional","affiliation":[]},{"given":"P.","family":"de Jong","sequence":"additional","affiliation":[]},{"given":"H.","family":"De la Torre","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Maria","sequence":"additional","affiliation":[]},{"given":"D.","family":"De Pedis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Salvo","sequence":"additional","affiliation":[]},{"given":"U.","family":"De Sanctis","sequence":"additional","affiliation":[]},{"given":"M.","family":"De Santis","sequence":"additional","affiliation":[]},{"given":"A.","family":"De Santo","sequence":"additional","affiliation":[]},{"given":"K.","family":"De Vasconcelos Corga","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"De Vivie De Regie","sequence":"additional","affiliation":[]},{"given":"C.","family":"Debenedetti","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Dedovich","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Deiana","sequence":"additional","affiliation":[]},{"given":"J.","family":"Del Peso","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Delabat Diaz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Delgove","sequence":"additional","affiliation":[]},{"given":"F.","family":"Deliot","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Delitzsch","sequence":"additional","affiliation":[]},{"given":"M.","family":"Della Pietra","sequence":"additional","affiliation":[]},{"given":"D.","family":"Della Volpe","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dell\u2019Acqua","sequence":"additional","affiliation":[]},{"given":"L.","family":"Dell\u2019Asta","sequence":"additional","affiliation":[]},{"given":"M.","family":"Delmastro","sequence":"additional","affiliation":[]},{"given":"C.","family":"Delporte","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Delsart","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"DeMarco","sequence":"additional","affiliation":[]},{"given":"S.","family":"Demers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Demichev","sequence":"additional","affiliation":[]},{"given":"G.","family":"Demontigny","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Denisov","sequence":"additional","affiliation":[]},{"given":"L.","family":"D\u2019Eramo","sequence":"additional","affiliation":[]},{"given":"D.","family":"Derendarz","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Derkaoui","sequence":"additional","affiliation":[]},{"given":"F.","family":"Derue","sequence":"additional","affiliation":[]},{"given":"P.","family":"Dervan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Desch","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deterre","sequence":"additional","affiliation":[]},{"given":"K.","family":"Dette","sequence":"additional","affiliation":[]},{"given":"C.","family":"Deutsch","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Devesa","sequence":"additional","affiliation":[]},{"given":"P. O.","family":"Deviveiros","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Di Bello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"L.","family":"Di Ciaccio","sequence":"additional","affiliation":[]},{"given":"W. K.","family":"Di Clemente","sequence":"additional","affiliation":[]},{"given":"C.","family":"Di Donato","sequence":"additional","affiliation":[]},{"given":"A.","family":"Di Girolamo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Di Gregorio","sequence":"additional","affiliation":[]},{"given":"B.","family":"Di Micco","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Nardo","sequence":"additional","affiliation":[]},{"given":"K. F.","family":"Di Petrillo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Di Sipio","sequence":"additional","affiliation":[]},{"given":"C.","family":"Diaconu","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Dias","sequence":"additional","affiliation":[]},{"given":"T. Dias","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Diaz","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dickinson","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Diehl","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dietrich","sequence":"additional","affiliation":[]},{"given":"S.","family":"D\u00edez Cornell","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dimitrievska","sequence":"additional","affiliation":[]},{"given":"W.","family":"Ding","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dingfelder","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dittus","sequence":"additional","affiliation":[]},{"given":"F.","family":"Djama","sequence":"additional","affiliation":[]},{"given":"T.","family":"Djobava","sequence":"additional","affiliation":[]},{"given":"J. I.","family":"Djuvsland","sequence":"additional","affiliation":[]},{"given":"M. A. B.","family":"Do Vale","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dobre","sequence":"additional","affiliation":[]},{"given":"D.","family":"Dodsworth","sequence":"additional","affiliation":[]},{"given":"C.","family":"Doglioni","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dolejsi","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Dolezal","sequence":"additional","affiliation":[]},{"given":"M.","family":"Donadelli","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dong","sequence":"additional","affiliation":[]},{"given":"J.","family":"Donini","sequence":"additional","affiliation":[]},{"given":"A.","family":"D\u2019onofrio","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u2019Onofrio","sequence":"additional","affiliation":[]},{"given":"J.","family":"Dopke","sequence":"additional","affiliation":[]},{"given":"A.","family":"Doria","sequence":"additional","affiliation":[]},{"given":"M. T.","family":"Dova","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Doyle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Drechsler","sequence":"additional","affiliation":[]},{"given":"E.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"T.","family":"Dreyer","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Drobac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Du","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Duan","sequence":"additional","affiliation":[]},{"given":"F.","family":"Dubinin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dubovsky","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dubreuil","sequence":"additional","affiliation":[]},{"given":"E.","family":"Duchovni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Duckeck","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ducourthial","sequence":"additional","affiliation":[]},{"given":"O. A.","family":"Ducu","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Dudarev","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Dudder","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Duffield","sequence":"additional","affiliation":[]},{"given":"L.","family":"Duflot","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fchrssen","sequence":"additional","affiliation":[]},{"given":"C.","family":"D\u00fclsen","sequence":"additional","affiliation":[]},{"given":"lsen M.","family":"Dumancic","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Dumitriu","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Duncan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dunford","sequence":"additional","affiliation":[]},{"given":"A.","family":"Duperrin","sequence":"additional","affiliation":[]},{"given":"H. Duran","family":"Yildiz","sequence":"additional","affiliation":[]},{"given":"M.","family":"D\u00fcren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Durglishvili","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duschinger","sequence":"additional","affiliation":[]},{"given":"B.","family":"Dutta","sequence":"additional","affiliation":[]},{"given":"D.","family":"Duvnjak","sequence":"additional","affiliation":[]},{"given":"B. L.","family":"Dwyer","sequence":"additional","affiliation":[]},{"given":"G. I.","family":"Dyckes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Dyndal","sequence":"additional","affiliation":[]},{"given":"S.","family":"Dysch","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Dziedzic","sequence":"additional","affiliation":[]},{"given":"K. M.","family":"Ecker","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Eggleston","sequence":"additional","affiliation":[]},{"given":"T.","family":"Eifert","sequence":"additional","affiliation":[]},{"given":"G.","family":"Eigen","sequence":"additional","affiliation":[]},{"given":"K.","family":"Einsweiler","sequence":"additional","affiliation":[]},{"given":"T.","family":"Ekelof","sequence":"additional","affiliation":[]},{"given":"H.","family":"El Jarrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"El Kosseifi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ellajosyula","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ellert","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ellinghaus","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Elliot","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ellis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Elmsheuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Elsing","sequence":"additional","affiliation":[]},{"given":"D.","family":"Emeliyanov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Emerman","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Enari","sequence":"additional","affiliation":[]},{"given":"M. B.","family":"Epland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Erdmann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ereditato","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Erland","sequence":"additional","affiliation":[]},{"given":"M.","family":"Errenst","sequence":"additional","affiliation":[]},{"given":"M.","family":"Escalier","sequence":"additional","affiliation":[]},{"given":"C.","family":"Escobar","sequence":"additional","affiliation":[]},{"given":"O.","family":"Estrada Pastor","sequence":"additional","affiliation":[]},{"given":"E.","family":"Etzion","sequence":"additional","affiliation":[]},{"given":"H.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"M. O.","family":"Evans","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ezhilov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fabbri","sequence":"additional","affiliation":[]},{"given":"V.","family":"Fabiani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Facini","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Faisca Rodrigues Pereira","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Fakhrutdinov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falciano","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"S.","family":"Falke","sequence":"additional","affiliation":[]},{"given":"J.","family":"Faltova","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Fang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Fanourakis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Fanti","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faraj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farbin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Farilla","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Farina","sequence":"additional","affiliation":[]},{"given":"T.","family":"Farooque","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Farrington","sequence":"additional","affiliation":[]},{"given":"P.","family":"Farthouat","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fassi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fassnacht","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fassouliotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Faucci Giannelli","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Fawcett","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fayard","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Fedin","sequence":"additional","affiliation":[]},{"given":"W.","family":"Fedorko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feickert","sequence":"additional","affiliation":[]},{"given":"L.","family":"Feligioni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Fell","sequence":"additional","affiliation":[]},{"given":"C.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M.","family":"Feng","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Fenton","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Fenyuk","sequence":"additional","affiliation":[]},{"given":"S. W.","family":"Ferguson","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ferrando","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrante","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ferrari","sequence":"additional","affiliation":[]},{"given":"D. E.","family":"Ferreira de Lima","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ferrer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ferrere","sequence":"additional","affiliation":[]},{"given":"C.","family":"Ferretti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fiedler","sequence":"additional","affiliation":[]},{"given":"A.","family":"Filip\u010di\u010d","sequence":"additional","affiliation":[]},{"given":"F.","family":"Filthaut","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"Finelli","sequence":"additional","affiliation":[]},{"given":"M. C. N.","family":"Fiolhais","sequence":"additional","affiliation":[]},{"given":"L.","family":"Fiorini","sequence":"additional","affiliation":[]},{"given":"F.","family":"Fischer","sequence":"additional","affiliation":[]},{"given":"W. C.","family":"Fisher","sequence":"additional","affiliation":[]},{"given":"I.","family":"Fleck","sequence":"additional","affiliation":[]},{"given":"P.","family":"Fleischmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Flick","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Flierl","sequence":"additional","affiliation":[]},{"given":"L.","family":"Flores","sequence":"additional","affiliation":[]},{"given":"L. R.","family":"Flores Castillo","sequence":"additional","affiliation":[]},{"given":"F. M.","family":"Follega","sequence":"additional","affiliation":[]},{"given":"N.","family":"Fomin","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Foo","sequence":"additional","affiliation":[]},{"given":"G. T.","family":"Forcolin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Formica","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"F\u00f6rster","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Forti","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Foster","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Foti","sequence":"additional","affiliation":[]},{"given":"D.","family":"Fournier","sequence":"additional","affiliation":[]},{"given":"H.","family":"Fox","sequence":"additional","affiliation":[]},{"given":"P.","family":"Francavilla","sequence":"additional","affiliation":[]},{"given":"S.","family":"Francescato","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franchini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Franchino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Francis","sequence":"additional","affiliation":[]},{"given":"L.","family":"Franconi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Franklin","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Fray","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Freeman","sequence":"additional","affiliation":[]},{"given":"B.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"W. S.","family":"Freund","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Freundlich","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"Frizzell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Froidevaux","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Frost","sequence":"additional","affiliation":[]},{"given":"C.","family":"Fukunaga","sequence":"additional","affiliation":[]},{"given":"E.","family":"Fullana Torregrosa","sequence":"additional","affiliation":[]},{"given":"T.","family":"Fusayasu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Fuster","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gabrielli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gadatsch","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gadow","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gagliardi","sequence":"additional","affiliation":[]},{"given":"L. G.","family":"Gagnon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Galhardo","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Gallardo","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Gallas","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Gallop","sequence":"additional","affiliation":[]},{"given":"G.","family":"Galster","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gamboa Goni","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Gan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ganguly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Gao","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garc\u00eda","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Garc\u00eda Navarro","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Garc\u00eda Pascual","sequence":"additional","affiliation":[]},{"given":"C.","family":"Garcia-Argos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Garcia-Sciveres","sequence":"additional","affiliation":[]},{"given":"R. W.","family":"Gardner","sequence":"additional","affiliation":[]},{"given":"N.","family":"Garelli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gargiulo","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Garner","sequence":"additional","affiliation":[]},{"given":"V.","family":"Garonne","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Gasiorowski","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gaspar","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gaudiello","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaudio","sequence":"additional","affiliation":[]},{"given":"I. L.","family":"Gavrilenko","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gavrilyuk","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gay","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gaycken","sequence":"additional","affiliation":[]},{"given":"E. N.","family":"Gazis","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Geanta","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"C. N. P.","family":"Gee","sequence":"additional","affiliation":[]},{"given":"J.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Geisen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gemme","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Genest","sequence":"additional","affiliation":[]},{"given":"C.","family":"Geng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gentile","sequence":"additional","affiliation":[]},{"given":"S.","family":"George","sequence":"additional","affiliation":[]},{"given":"T.","family":"Geralis","sequence":"additional","affiliation":[]},{"given":"L. O.","family":"Gerlach","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gessinger-Befurt","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gessner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ghasemi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghasemi Bostanabad","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ghneimat","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ghosh","sequence":"additional","affiliation":[]},{"given":"B.","family":"Giacobbe","sequence":"additional","affiliation":[]},{"given":"S.","family":"Giagu","sequence":"additional","affiliation":[]},{"given":"N.","family":"Giangiacomi","sequence":"additional","affiliation":[]},{"given":"P.","family":"Giannetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giannini","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Gibson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gignac","sequence":"additional","affiliation":[]},{"given":"D.","family":"Gillberg","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gilles","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Gingrich","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Giordani","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Giraud","sequence":"additional","affiliation":[]},{"given":"G.","family":"Giugliarelli","sequence":"additional","affiliation":[]},{"given":"D.","family":"Giugni","sequence":"additional","affiliation":[]},{"given":"F.","family":"Giuli","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gkaitatzis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gkialas","sequence":"additional","affiliation":[]},{"given":"E. L.","family":"Gkougkousis","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gkountoumis","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Gladilin","sequence":"additional","affiliation":[]},{"given":"C.","family":"Glasman","sequence":"additional","affiliation":[]},{"given":"J.","family":"Glatzer","sequence":"additional","affiliation":[]},{"given":"P. C. F.","family":"Glaysher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Glazov","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gledhill","sequence":"additional","affiliation":[]},{"given":"I.","family":"Gnesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Goblirsch-Kolb","sequence":"additional","affiliation":[]},{"given":"D.","family":"Godin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Goldfarb","sequence":"additional","affiliation":[]},{"given":"T.","family":"Golling","sequence":"additional","affiliation":[]},{"given":"D.","family":"Golubkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gomes","sequence":"additional","affiliation":[]},{"given":"R.","family":"Goncalves Gama","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gon\u00e7alo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"L.","family":"Gonella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gongadze","sequence":"additional","affiliation":[]},{"given":"F.","family":"Gonnella","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Gonski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonz\u00e1lez de la Hoz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez Fernandez","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gonzalez-Sevilla","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Gonzalvo Rodriguez","sequence":"additional","affiliation":[]},{"given":"L.","family":"Goossens","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Gorasia","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Gorbounov","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Gordon","sequence":"additional","affiliation":[]},{"given":"B.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gorini","sequence":"additional","affiliation":[]},{"given":"A.","family":"Gori\u0161ek","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Goshaw","sequence":"additional","affiliation":[]},{"given":"M. I.","family":"Gostkin","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Gottardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Gouighri","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Goussiou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Govender","sequence":"additional","affiliation":[]},{"given":"C.","family":"Goy","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gozani","sequence":"additional","affiliation":[]},{"given":"I.","family":"Grabowska-Bold","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Graham","sequence":"additional","affiliation":[]},{"given":"J.","family":"Gramling","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gramstad","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grancagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Grandi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Gratchev","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Gravila","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Gravili","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"H. M.","family":"Gray","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grefe","sequence":"additional","affiliation":[]},{"given":"K.","family":"Gregersen","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Gregor","sequence":"additional","affiliation":[]},{"given":"P.","family":"Grenier","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grevtsov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grieco","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Grieser","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Grillo","sequence":"additional","affiliation":[]},{"given":"K.","family":"Grimm","sequence":"additional","affiliation":[]},{"given":"S.","family":"Grinstein","sequence":"additional","affiliation":[]},{"given":"J.-F.","family":"Grivaz","sequence":"additional","affiliation":[]},{"given":"S.","family":"Groh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Gross","sequence":"additional","affiliation":[]},{"given":"J.","family":"Grosse-Knetter","sequence":"additional","affiliation":[]},{"given":"Z. J.","family":"Grout","sequence":"additional","affiliation":[]},{"given":"C.","family":"Grud","sequence":"additional","affiliation":[]},{"given":"A.","family":"Grummer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guan","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gubbels","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guenther","sequence":"additional","affiliation":[]},{"given":"A.","family":"Guerguichon","sequence":"additional","affiliation":[]},{"given":"J. G. R.","family":"Guerrero Rojas","sequence":"additional","affiliation":[]},{"given":"F.","family":"Guescini","sequence":"additional","affiliation":[]},{"given":"D.","family":"Guest","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Guillemin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Guindon","sequence":"additional","affiliation":[]},{"given":"U.","family":"Gul","sequence":"additional","affiliation":[]},{"given":"J.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"W.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Guo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Gupta","sequence":"additional","affiliation":[]},{"given":"S.","family":"Gurbuz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Gustavino","sequence":"additional","affiliation":[]},{"given":"M.","family":"Guth","sequence":"additional","affiliation":[]},{"given":"P.","family":"Gutierrez","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gutschow","sequence":"additional","affiliation":[]},{"given":"C.","family":"Guyot","sequence":"additional","affiliation":[]},{"given":"C.","family":"Gwenlan","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Gwilliam","sequence":"additional","affiliation":[]},{"given":"A.","family":"Haas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Haber","sequence":"additional","affiliation":[]},{"given":"H. K.","family":"Hadavand","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hadef","sequence":"additional","affiliation":[]},{"given":"M.","family":"Haleem","sequence":"additional","affiliation":[]},{"given":"J.","family":"Haley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Halladjian","sequence":"additional","affiliation":[]},{"given":"G. D.","family":"Hallewell","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamacher","sequence":"additional","affiliation":[]},{"given":"P.","family":"Hamal","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hamano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Hamdaoui","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hamer","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Hamity","sequence":"additional","affiliation":[]},{"given":"K.","family":"Han","sequence":"additional","affiliation":[]},{"given":"L.","family":"Han","sequence":"additional","affiliation":[]},{"given":"S.","family":"Han","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Han","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hanagaki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hance","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Handl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Haney","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hankache","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"P. H.","family":"Hansen","sequence":"additional","affiliation":[]},{"given":"E. C.","family":"Hanson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hara","sequence":"additional","affiliation":[]},{"given":"T.","family":"Harenberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Harkusha","sequence":"additional","affiliation":[]},{"given":"P. F.","family":"Harrison","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Hartmann","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hasegawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hasib","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hassani","sequence":"additional","affiliation":[]},{"given":"S.","family":"Haug","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hauser","sequence":"additional","affiliation":[]},{"given":"L. B.","family":"Havener","sequence":"additional","affiliation":[]},{"given":"M.","family":"Havranek","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Hawkes","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Hawkings","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hayden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"R. L.","family":"Hayes","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Hays","sequence":"additional","affiliation":[]},{"given":"H. S.","family":"Hayward","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Haywood","sequence":"additional","affiliation":[]},{"given":"F.","family":"He","sequence":"additional","affiliation":[]},{"given":"M. P.","family":"Heath","sequence":"additional","affiliation":[]},{"given":"V.","family":"Hedberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heer","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Heidegger","sequence":"additional","affiliation":[]},{"given":"W. D.","family":"Heidorn","sequence":"additional","affiliation":[]},{"given":"J.","family":"Heilman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"T.","family":"Heim","sequence":"additional","affiliation":[]},{"given":"B.","family":"Heinemann","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"L.","family":"Heinrich","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hejbal","sequence":"additional","affiliation":[]},{"given":"L.","family":"Helary","sequence":"additional","affiliation":[]},{"given":"A.","family":"Held","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellesund","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Helling","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hellman","sequence":"additional","affiliation":[]},{"given":"C.","family":"Helsens","sequence":"additional","affiliation":[]},{"given":"R. C. W.","family":"Henderson","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Heng","sequence":"additional","affiliation":[]},{"given":"L.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"S.","family":"Henkelmann","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Henriques Correia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herde","sequence":"additional","affiliation":[]},{"given":"V.","family":"Herget","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Hern\u00e1ndez Jim\u00e9nez","sequence":"additional","affiliation":[]},{"given":"H.","family":"Herr","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"T.","family":"Herrmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Herten","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hertenberger","sequence":"additional","affiliation":[]},{"given":"L.","family":"Hervas","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Herwig","sequence":"additional","affiliation":[]},{"given":"G. G.","family":"Hesketh","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Hessey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Higashida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Higashino","sequence":"additional","affiliation":[]},{"given":"E.","family":"Hig\u00f3n-Rodriguez","sequence":"additional","affiliation":[]},{"given":"K.","family":"Hildebrand","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. K.","family":"Hill","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Hiller","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Hillier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hils","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hinchliffe","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hinterkeuser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hirschbuehl","sequence":"additional","affiliation":[]},{"given":"B.","family":"Hiti","sequence":"additional","affiliation":[]},{"given":"O.","family":"Hladik","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Hlaluku","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hobbs","sequence":"additional","affiliation":[]},{"given":"N.","family":"Hod","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Hodgkinson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoecker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohn","sequence":"additional","affiliation":[]},{"given":"D.","family":"Hohov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Holm","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Holmes","sequence":"additional","affiliation":[]},{"given":"M.","family":"Holzbock","sequence":"additional","affiliation":[]},{"given":"L. B. A. H.","family":"Hommels","sequence":"additional","affiliation":[]},{"given":"S.","family":"Honda","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Hong","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Honig","sequence":"additional","affiliation":[]},{"given":"A.","family":"H\u00f6nle","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Hooberman","sequence":"additional","affiliation":[]},{"given":"W. H.","family":"Hopkins","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Horii","sequence":"additional","affiliation":[]},{"given":"P.","family":"Horn","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Horyn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hoummada","sequence":"additional","affiliation":[]},{"given":"J.","family":"Howarth","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hoya","sequence":"additional","affiliation":[]},{"given":"M.","family":"Hrabovsky","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrdinka","sequence":"additional","affiliation":[]},{"given":"I.","family":"Hristova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Hrivnac","sequence":"additional","affiliation":[]},{"given":"A.","family":"Hrynevich","sequence":"additional","affiliation":[]},{"given":"T.","family":"Hryn\u2019ova","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"S.-C.","family":"Hsu","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"Y. F.","family":"Hu","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Huang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Hubacek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Hubaut","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huebner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Huegging","sequence":"additional","affiliation":[]},{"given":"T. B.","family":"Huffman","sequence":"additional","affiliation":[]},{"given":"M.","family":"Huhtinen","sequence":"additional","affiliation":[]},{"given":"R. F. H.","family":"Hunter","sequence":"additional","affiliation":[]},{"given":"P.","family":"Huo","sequence":"additional","affiliation":[]},{"given":"N.","family":"Huseynov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huston","sequence":"additional","affiliation":[]},{"given":"J.","family":"Huth","sequence":"additional","affiliation":[]},{"given":"R.","family":"Hyneman","sequence":"additional","affiliation":[]},{"given":"S.","family":"Hyrych","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iacobucci","sequence":"additional","affiliation":[]},{"given":"G.","family":"Iakovidis","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ibragimov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Iconomidou-Fayard","sequence":"additional","affiliation":[]},{"given":"P.","family":"Iengo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ignazzi","sequence":"additional","affiliation":[]},{"given":"O.","family":"Igonkina","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Iizawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ikegami","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ikeno","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ilg","sequence":"additional","affiliation":[]},{"given":"D.","family":"Iliadis","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ilic","sequence":"additional","affiliation":[]},{"given":"F.","family":"Iltzsche","sequence":"additional","affiliation":[]},{"given":"G.","family":"Introzzi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Iodice","sequence":"additional","affiliation":[]},{"given":"K.","family":"Iordanidou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Ippolito","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Isacson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ishino","sequence":"additional","affiliation":[]},{"given":"W.","family":"Islam","sequence":"additional","affiliation":[]},{"given":"C.","family":"Issever","sequence":"additional","affiliation":[]},{"given":"S.","family":"Istin","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ito","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Iturbe Ponce","sequence":"additional","affiliation":[]},{"given":"R.","family":"Iuppa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ivina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Iwasaki","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Izen","sequence":"additional","affiliation":[]},{"given":"V.","family":"Izzo","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jacka","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jackson","sequence":"additional","affiliation":[]},{"given":"R. M.","family":"Jacobs","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Jaeger","sequence":"additional","affiliation":[]},{"given":"V.","family":"Jain","sequence":"additional","affiliation":[]},{"given":"G.","family":"J\u00e4kel","sequence":"additional","affiliation":[]},{"given":"K. B.","family":"Jakobi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Jakobs","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jakoubek","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jamieson","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Janas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Jansky","sequence":"additional","affiliation":[]},{"given":"M.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Janus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Jarlskog","sequence":"additional","affiliation":[]},{"given":"N.","family":"Javadov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Jav\u016frek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Javurkova","sequence":"additional","affiliation":[]},{"given":"F.","family":"Jeanneau","sequence":"additional","affiliation":[]},{"given":"L.","family":"Jeanty","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jejelava","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jelinskas","sequence":"additional","affiliation":[]},{"given":"P.","family":"Jenni","sequence":"additional","affiliation":[]},{"given":"N.","family":"Jeong","sequence":"additional","affiliation":[]},{"given":"S.","family":"J\u00e9z\u00e9quel","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ji","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jia","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Jiang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jiggins","sequence":"additional","affiliation":[]},{"given":"F. A.","family":"Jimenez Morales","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jimenez Pena","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Jinaru","sequence":"additional","affiliation":[]},{"given":"O.","family":"Jinnouchi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Jivan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Johansson","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Johns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Johnson","sequence":"additional","affiliation":[]},{"given":"R. W. L.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"S.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Jones","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jongmanns","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Jorge","sequence":"additional","affiliation":[]},{"given":"J.","family":"Jovicevic","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ju","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Junggeburth","sequence":"additional","affiliation":[]},{"given":"A.","family":"Juste Rozas","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaczmarska","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kado","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kagan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kahn","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kahra","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kaji","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kajomovitz","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Kalderon","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kaluza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kamenshchikov","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kaneda","sequence":"additional","affiliation":[]},{"given":"N. J.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Kanjir","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kanzaki","sequence":"additional","affiliation":[]},{"given":"L. S.","family":"Kaplan","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kar","sequence":"additional","affiliation":[]},{"given":"K.","family":"Karava","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Kareem","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Karpov","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Karpova","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kartvelishvili","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Karyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kastanas","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kato","sequence":"additional","affiliation":[]},{"given":"J.","family":"Katzy","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawade","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kawagoe","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawaguchi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kawamoto","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kawamura","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"Kay","sequence":"additional","affiliation":[]},{"given":"V. F.","family":"Kazanin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Keeler","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kehoe","sequence":"additional","affiliation":[]},{"given":"J. S.","family":"Keller","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kellermann","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kelsey","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Kempster","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kendrick","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Kennedy","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kepka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kersten","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Ker\u0161evan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ketabchi Haghighat","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khader","sequence":"additional","affiliation":[]},{"given":"F.","family":"Khalil-Zada","sequence":"additional","affiliation":[]},{"given":"M.","family":"Khandoga","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khanov","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Kharlamov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kharlamova","sequence":"additional","affiliation":[]},{"given":"E. E.","family":"Khoda","sequence":"additional","affiliation":[]},{"given":"A.","family":"Khodinov","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Khoo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Khramov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Khubua","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kido","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kiehn","sequence":"additional","affiliation":[]},{"given":"C. R.","family":"Kilby","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"Y. K.","family":"Kim","sequence":"additional","affiliation":[]},{"given":"N.","family":"Kimura","sequence":"additional","affiliation":[]},{"given":"O. M.","family":"Kind","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"King","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kirchmeier","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kirk","sequence":"additional","affiliation":[]},{"given":"A. E.","family":"Kiryunin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kishimoto","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Kisliuk","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kitali","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kivernyk","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klapdor-Kleingrothaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klassen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M. H.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"M.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"U.","family":"Klein","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kleinknecht","sequence":"additional","affiliation":[]},{"given":"P.","family":"Klimek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Klimentov","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klingl","sequence":"additional","affiliation":[]},{"given":"T.","family":"Klioutchnikova","sequence":"additional","affiliation":[]},{"given":"F. F.","family":"Klitzner","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kluit","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kluth","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kneringer","sequence":"additional","affiliation":[]},{"given":"E. B. F. G.","family":"Knoops","sequence":"additional","affiliation":[]},{"given":"A.","family":"Knue","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kobayashi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kobel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kocian","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kodama","sequence":"additional","affiliation":[]},{"given":"P.","family":"Kodys","sequence":"additional","affiliation":[]},{"given":"P. T.","family":"Koenig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Koffas","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"K\u00f6hler","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kolb","sequence":"additional","affiliation":[]},{"given":"I.","family":"Koletsou","sequence":"additional","affiliation":[]},{"given":"T.","family":"Komarek","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kondo","sequence":"additional","affiliation":[]},{"given":"K.","family":"K\u00f6neke","sequence":"additional","affiliation":[]},{"given":"A. X. Y.","family":"Kong","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"K\u00f6nig","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kono","sequence":"additional","affiliation":[]},{"given":"V.","family":"Konstantinides","sequence":"additional","affiliation":[]},{"given":"N.","family":"Konstantinidis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Konya","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kopeliansky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Koperny","sequence":"additional","affiliation":[]},{"given":"K.","family":"Korcyl","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kordas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Koren","sequence":"additional","affiliation":[]},{"given":"A.","family":"Korn","sequence":"additional","affiliation":[]},{"given":"I.","family":"Korolkov","sequence":"additional","affiliation":[]},{"given":"E. V.","family":"Korolkova","sequence":"additional","affiliation":[]},{"given":"N.","family":"Korotkova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kortner","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kosek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Kostyukhin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotsokechagia","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kotwal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Koulouris","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kourkoumeli-Charalampidi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Kourkoumelis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Kourlitis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kouskoura","sequence":"additional","affiliation":[]},{"given":"A. B.","family":"Kowalewska","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kowalewski","sequence":"additional","affiliation":[]},{"given":"W.","family":"Kozanecki","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Kozhin","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Kramarenko","sequence":"additional","affiliation":[]},{"given":"G.","family":"Kramberger","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krasnopevtsev","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Krasny","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krasznahorkay","sequence":"additional","affiliation":[]},{"given":"D.","family":"Krauss","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Kremer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kretzschmar","sequence":"additional","affiliation":[]},{"given":"P.","family":"Krieger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Krieter","sequence":"additional","affiliation":[]},{"given":"A.","family":"Krishnan","sequence":"additional","affiliation":[]},{"given":"K.","family":"Krizka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Kroeninger","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kroha","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kroll","sequence":"additional","affiliation":[]},{"given":"K. S.","family":"Krowpman","sequence":"additional","affiliation":[]},{"given":"U.","family":"Kruchonak","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kr\u00fcger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Krumnack","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Kruse","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Krzysiak","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kubota","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuchinskaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuday","sequence":"additional","affiliation":[]},{"given":"D.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Kuechler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuehn","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kugel","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kuhl","sequence":"additional","affiliation":[]},{"given":"V.","family":"Kukhtin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Kukla","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Kulchitsky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Kuleshov","sequence":"additional","affiliation":[]},{"given":"Y. P.","family":"Kulinich","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuna","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kunigo","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kupco","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kupfer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Kuprash","sequence":"additional","affiliation":[]},{"given":"H.","family":"Kurashige","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Kurchaninov","sequence":"additional","affiliation":[]},{"given":"Y. A.","family":"Kurochkin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Kurova","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Kurth","sequence":"additional","affiliation":[]},{"given":"E. S.","family":"Kuwertz","sequence":"additional","affiliation":[]},{"given":"M.","family":"Kuze","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Kvam","sequence":"additional","affiliation":[]},{"given":"J.","family":"Kvita","sequence":"additional","affiliation":[]},{"given":"T.","family":"Kwan","sequence":"additional","affiliation":[]},{"given":"L.","family":"La Rotonda","sequence":"additional","affiliation":[]},{"given":"F.","family":"La Ruffa","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lacasta","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lacava","sequence":"additional","affiliation":[]},{"given":"D. P. J.","family":"Lack","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lacker","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lacour","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ladygin","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lafaye","sequence":"additional","affiliation":[]},{"given":"B.","family":"Laforge","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lagouri","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lai","sequence":"additional","affiliation":[]},{"given":"I. K.","family":"Lakomiec","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lammers","sequence":"additional","affiliation":[]},{"given":"W.","family":"Lampl","sequence":"additional","affiliation":[]},{"given":"C.","family":"Lampoudis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lan\u00e7on","sequence":"additional","affiliation":[]},{"given":"U.","family":"Landgraf","sequence":"additional","affiliation":[]},{"given":"M. P. J.","family":"Landon","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Lanfermann","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Lang","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Lange","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Langenberg","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Lankford","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lanni","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lantzsch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lanza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lapertosa","sequence":"additional","affiliation":[]},{"given":"S.","family":"Laplace","sequence":"additional","affiliation":[]},{"given":"J. F.","family":"Laporte","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lari","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lasagni Manghi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lassnig","sequence":"additional","affiliation":[]},{"given":"T. S.","family":"Lau","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laudrain","sequence":"additional","affiliation":[]},{"given":"A.","family":"Laurier","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lavorgna","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Lawlor","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lazzaroni","sequence":"additional","affiliation":[]},{"given":"B.","family":"Le","sequence":"additional","affiliation":[]},{"given":"E.","family":"Le Guirriec","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lebedev","sequence":"additional","affiliation":[]},{"given":"M.","family":"LeBlanc","sequence":"additional","affiliation":[]},{"given":"T.","family":"LeCompte","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ledroit-Guillon","sequence":"additional","affiliation":[]},{"given":"A. C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"G. R.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"L.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lee","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"H. P.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lefebvre","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leggett","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lehmann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Lehmann Miotto","sequence":"additional","affiliation":[]},{"given":"W. A.","family":"Leight","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leisos","sequence":"additional","affiliation":[]},{"given":"M. A. L.","family":"Leite","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Leitgeb","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leitner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lellouch","sequence":"additional","affiliation":[]},{"given":"K. J. C.","family":"Leney","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lenz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Leone","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leonidopoulos","sequence":"additional","affiliation":[]},{"given":"A.","family":"Leopold","sequence":"additional","affiliation":[]},{"given":"C.","family":"Leroy","sequence":"additional","affiliation":[]},{"given":"R.","family":"Les","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Lester","sequence":"additional","affiliation":[]},{"given":"M.","family":"Levchenko","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lev\u00eaque","sequence":"additional","affiliation":[]},{"given":"D.","family":"Levin","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Levinson","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Lewis","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"B.","family":"Li","sequence":"additional","affiliation":[]},{"given":"C-Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"F.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"H.","family":"Li","sequence":"additional","affiliation":[]},{"given":"J.","family":"Li","sequence":"additional","affiliation":[]},{"given":"K.","family":"Li","sequence":"additional","affiliation":[]},{"given":"L.","family":"Li","sequence":"additional","affiliation":[]},{"given":"M.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Q. Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"S.","family":"Li","sequence":"additional","affiliation":[]},{"given":"X.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Li","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Liang","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liberti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Liblong","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Lim","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Lin","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Linck","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Lindon","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Lionti","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lipeles","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lipniacka","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Liss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lister","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Little","sequence":"additional","affiliation":[]},{"given":"B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"B. X.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. B.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"J. K. K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"K.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M. Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"P.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. L.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"Y. W.","family":"Liu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Livan","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lleres","sequence":"additional","affiliation":[]},{"given":"J.","family":"Llorente Merino","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Lloyd","sequence":"additional","affiliation":[]},{"given":"C. Y.","family":"Lo","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Lobodzinska","sequence":"additional","affiliation":[]},{"given":"P.","family":"Loch","sequence":"additional","affiliation":[]},{"given":"S.","family":"Loffredo","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lohse","sequence":"additional","affiliation":[]},{"given":"K.","family":"Lohwasser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lokajicek","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Long","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Long","sequence":"additional","affiliation":[]},{"given":"L.","family":"Longo","sequence":"additional","affiliation":[]},{"given":"K. A.","family":"Looper","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Lopez","sequence":"additional","affiliation":[]},{"given":"I. Lopez","family":"Paz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lopez Solis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Lorenz","sequence":"additional","affiliation":[]},{"given":"N.","family":"Lorenzo Martinez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Lory","sequence":"additional","affiliation":[]},{"given":"M.","family":"Losada","sequence":"additional","affiliation":[]},{"given":"P. J.","family":"L\u00f6sel","sequence":"additional","affiliation":[]},{"given":"A.","family":"L\u00f6sle","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"X.","family":"Lou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lounis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Love","sequence":"additional","affiliation":[]},{"given":"P. A.","family":"Love","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Lozano Bahilo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"Y. J.","family":"Lu","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Lubatti","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Lucotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Luedtke","sequence":"additional","affiliation":[]},{"given":"F.","family":"Luehring","sequence":"additional","affiliation":[]},{"given":"I.","family":"Luise","sequence":"additional","affiliation":[]},{"given":"L.","family":"Luminari","sequence":"additional","affiliation":[]},{"given":"B.","family":"Lund-Jensen","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Lutz","sequence":"additional","affiliation":[]},{"given":"D.","family":"Lynn","sequence":"additional","affiliation":[]},{"given":"H.","family":"Lyons","sequence":"additional","affiliation":[]},{"given":"R.","family":"Lysak","sequence":"additional","affiliation":[]},{"given":"E.","family":"Lytken","sequence":"additional","affiliation":[]},{"given":"F.","family":"Lyu","sequence":"additional","affiliation":[]},{"given":"V.","family":"Lyubushkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Lyubushkina","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"L. L.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ma","sequence":"additional","affiliation":[]},{"given":"G.","family":"Maccarrone","sequence":"additional","affiliation":[]},{"given":"A.","family":"Macchiolo","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Macdonald","sequence":"additional","affiliation":[]},{"given":"J.","family":"Machado Miguens","sequence":"additional","affiliation":[]},{"given":"D.","family":"Madaffari","sequence":"additional","affiliation":[]},{"given":"R.","family":"Madar","sequence":"additional","affiliation":[]},{"given":"W. F.","family":"Mader","sequence":"additional","affiliation":[]},{"given":"M.","family":"Madugoda Ralalage Don","sequence":"additional","affiliation":[]},{"given":"N.","family":"Madysa","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maeno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Maerker","sequence":"additional","affiliation":[]},{"given":"V.","family":"Magerl","sequence":"additional","affiliation":[]},{"given":"N.","family":"Magini","sequence":"additional","affiliation":[]},{"given":"J.","family":"Magro","sequence":"additional","affiliation":[]},{"given":"D. J.","family":"Mahon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Maidantchik","sequence":"additional","affiliation":[]},{"given":"T.","family":"Maier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Maio","sequence":"additional","affiliation":[]},{"given":"K.","family":"Maj","sequence":"additional","affiliation":[]},{"given":"O.","family":"Majersky","sequence":"additional","affiliation":[]},{"given":"S.","family":"Majewski","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Makida","sequence":"additional","affiliation":[]},{"given":"N.","family":"Makovec","sequence":"additional","affiliation":[]},{"given":"B.","family":"Malaescu","sequence":"additional","affiliation":[]},{"given":"Pa.","family":"Malecki","sequence":"additional","affiliation":[]},{"given":"V. P.","family":"Maleev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Malek","sequence":"additional","affiliation":[]},{"given":"U.","family":"Mallik","sequence":"additional","affiliation":[]},{"given":"D.","family":"Malon","sequence":"additional","affiliation":[]},{"given":"C.","family":"Malone","sequence":"additional","affiliation":[]},{"given":"S.","family":"Maltezos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Malyukov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mamuzic","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mancini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Mandi\u0107","sequence":"additional","affiliation":[]},{"given":"L.","family":"Manhaes de Andrade Filho","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Maniatis","sequence":"additional","affiliation":[]},{"given":"J.","family":"Manjarres Ramos","sequence":"additional","affiliation":[]},{"given":"K. H.","family":"Mankinen","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mann","sequence":"additional","affiliation":[]},{"given":"A.","family":"Manousos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mansoulie","sequence":"additional","affiliation":[]},{"given":"I.","family":"Manthos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Manzoni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marantis","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marceca","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marchese","sequence":"additional","affiliation":[]},{"given":"G.","family":"Marchiori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marcisovsky","sequence":"additional","affiliation":[]},{"given":"L.","family":"Marcoccia","sequence":"additional","affiliation":[]},{"given":"C.","family":"Marcon","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Marin Tobon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Marjanovic","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Marshall","sequence":"additional","affiliation":[]},{"given":"M. U. F.","family":"Martensson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Marti-Garcia","sequence":"additional","affiliation":[]},{"given":"C. B.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"V. J.","family":"Martin","sequence":"additional","affiliation":[]},{"given":"B.","family":"Martin dit Latour","sequence":"additional","affiliation":[]},{"given":"L.","family":"Martinelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Martinez","sequence":"additional","affiliation":[]},{"given":"V. I.","family":"Martinez Outschoorn","sequence":"additional","affiliation":[]},{"given":"S.","family":"Martin-Haugh","sequence":"additional","affiliation":[]},{"given":"V. S.","family":"Martoiu","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Martyniuk","sequence":"additional","affiliation":[]},{"given":"A.","family":"Marzin","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Maschek","sequence":"additional","affiliation":[]},{"given":"L.","family":"Masetti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mashimo","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mashinistov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Masik","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Maslennikov","sequence":"additional","affiliation":[]},{"given":"L.","family":"Massa","sequence":"additional","affiliation":[]},{"given":"P.","family":"Massarotti","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mastrandrea","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mastroberardino","sequence":"additional","affiliation":[]},{"given":"T.","family":"Masubuchi","sequence":"additional","affiliation":[]},{"given":"D.","family":"Matakias","sequence":"additional","affiliation":[]},{"given":"A.","family":"Matic","sequence":"additional","affiliation":[]},{"given":"N.","family":"Matsuzawa","sequence":"additional","affiliation":[]},{"given":"P.","family":"M\u00e4ttig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Maurer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ma\u010dek","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Maximov","sequence":"additional","affiliation":[]},{"given":"R.","family":"Mazini","sequence":"additional","affiliation":[]},{"given":"I.","family":"Maznas","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Mazza","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Mc Kee","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"McCarthy","sequence":"additional","affiliation":[]},{"given":"W. P.","family":"McCormack","sequence":"additional","affiliation":[]},{"given":"E. F.","family":"McDonald","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Mcfayden","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mchedlidze","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"McKay","sequence":"additional","affiliation":[]},{"given":"K. D.","family":"McLean","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"McMahon","sequence":"additional","affiliation":[]},{"given":"P. C.","family":"McNamara","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"McNicol","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"McPherson","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Mdhluli","sequence":"additional","affiliation":[]},{"given":"Z. A.","family":"Meadows","sequence":"additional","affiliation":[]},{"given":"S.","family":"Meehan","sequence":"additional","affiliation":[]},{"given":"T.","family":"Megy","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mehlhase","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mehta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Meideck","sequence":"additional","affiliation":[]},{"given":"B.","family":"Meirose","sequence":"additional","affiliation":[]},{"given":"D.","family":"Melini","sequence":"additional","affiliation":[]},{"given":"B. R.","family":"Mellado Garcia","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Mellenthin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Melo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Meloni","sequence":"additional","affiliation":[]},{"given":"A.","family":"Melzer","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Menary","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Mendes Gouveia","sequence":"additional","affiliation":[]},{"given":"L.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"X. T.","family":"Meng","sequence":"additional","affiliation":[]},{"given":"S.","family":"Menke","sequence":"additional","affiliation":[]},{"given":"E.","family":"Meoni","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mergelmeyer","sequence":"additional","affiliation":[]},{"given":"S. A. M.","family":"Merkt","sequence":"additional","affiliation":[]},{"given":"C.","family":"Merlassino","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mermod","sequence":"additional","affiliation":[]},{"given":"L.","family":"Merola","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meroni","sequence":"additional","affiliation":[]},{"given":"G.","family":"Merz","sequence":"additional","affiliation":[]},{"given":"O.","family":"Meshkov","sequence":"additional","affiliation":[]},{"given":"J. K. R.","family":"Meshreki","sequence":"additional","affiliation":[]},{"given":"A.","family":"Messina","sequence":"additional","affiliation":[]},{"given":"J.","family":"Metcalfe","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Mete","sequence":"additional","affiliation":[]},{"given":"C.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"J-P.","family":"Meyer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Meyer Zu Theenhausen","sequence":"additional","affiliation":[]},{"given":"F.","family":"Miano","sequence":"additional","affiliation":[]},{"given":"M.","family":"Michetti","sequence":"additional","affiliation":[]},{"given":"R. P.","family":"Middleton","sequence":"additional","affiliation":[]},{"given":"L.","family":"Mijovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mikenberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mikestikova","sequence":"additional","affiliation":[]},{"given":"M.","family":"Miku\u017e","sequence":"additional","affiliation":[]},{"given":"H.","family":"Mildner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Milesi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milic","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Milke","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Millar","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Miller","sequence":"additional","affiliation":[]},{"given":"A.","family":"Milov","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Milstead","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Mina","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Minaenko","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mi\u00f1ano Moya","sequence":"additional","affiliation":[]},{"given":"I. A.","family":"Minashvili","sequence":"additional","affiliation":[]},{"given":"A. I.","family":"Mincer","sequence":"additional","affiliation":[]},{"given":"B.","family":"Mindur","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mineev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Minegishi","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Mir","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mirto","sequence":"additional","affiliation":[]},{"given":"K. P.","family":"Mistry","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mitani","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mitrevski","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Mitsou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mittal","sequence":"additional","affiliation":[]},{"given":"O.","family":"Miu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Miucci","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Miyagawa","sequence":"additional","affiliation":[]},{"given":"A.","family":"Mizukami","sequence":"additional","affiliation":[]},{"given":"J. U.","family":"Mj\u00f6rnmark","sequence":"additional","affiliation":[]},{"given":"T.","family":"Mkrtchyan","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mlynarikova","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moa","sequence":"additional","affiliation":[]},{"given":"K.","family":"Mochizuki","sequence":"additional","affiliation":[]},{"given":"P.","family":"Mogg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Mohapatra","sequence":"additional","affiliation":[]},{"given":"R.","family":"Moles-Valls","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Mondragon","sequence":"additional","affiliation":[]},{"given":"K.","family":"M\u00f6nig","sequence":"additional","affiliation":[]},{"given":"J.","family":"Monk","sequence":"additional","affiliation":[]},{"given":"E.","family":"Monnier","sequence":"additional","affiliation":[]},{"given":"A.","family":"Montalbano","sequence":"additional","affiliation":[]},{"given":"J.","family":"Montejo Berlingen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Montella","sequence":"additional","affiliation":[]},{"given":"F.","family":"Monticelli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Morange","sequence":"additional","affiliation":[]},{"given":"D.","family":"Moreno","sequence":"additional","affiliation":[]},{"given":"M.","family":"Moreno Ll\u00e1cer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Moreno Martinez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Morettini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"S.","family":"Morgenstern","sequence":"additional","affiliation":[]},{"given":"D.","family":"Mori","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morii","sequence":"additional","affiliation":[]},{"given":"M.","family":"Morinaga","sequence":"additional","affiliation":[]},{"given":"V.","family":"Morisbak","sequence":"additional","affiliation":[]},{"given":"A. K.","family":"Morley","sequence":"additional","affiliation":[]},{"given":"G.","family":"Mornacchi","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Morris","sequence":"additional","affiliation":[]},{"given":"L.","family":"Morvaj","sequence":"additional","affiliation":[]},{"given":"P.","family":"Moschovakos","sequence":"additional","affiliation":[]},{"given":"B.","family":"Moser","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mosidze","sequence":"additional","affiliation":[]},{"given":"T.","family":"Moskalets","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"J.","family":"Moss","sequence":"additional","affiliation":[]},{"given":"E. J. W.","family":"Moyse","sequence":"additional","affiliation":[]},{"given":"S.","family":"Muanza","sequence":"additional","affiliation":[]},{"given":"J.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"R. S. P.","family":"Mueller","sequence":"additional","affiliation":[]},{"given":"D.","family":"Muenstermann","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Mullier","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Mungo","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Munoz Martinez","sequence":"additional","affiliation":[]},{"given":"F. J.","family":"Munoz Sanchez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Murin","sequence":"additional","affiliation":[]},{"given":"W. J.","family":"Murray","sequence":"additional","affiliation":[]},{"given":"A.","family":"Murrone","sequence":"additional","affiliation":[]},{"given":"M.","family":"Mu\u0161kinja","sequence":"additional","affiliation":[]},{"given":"kinja C.","family":"Mwewa","sequence":"additional","affiliation":[]},{"given":"A. G.","family":"Myagkov","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"J.","family":"Myers","sequence":"additional","affiliation":[]},{"given":"M.","family":"Myska","sequence":"additional","affiliation":[]},{"given":"B. P.","family":"Nachman","sequence":"additional","affiliation":[]},{"given":"O.","family":"Nackenhorst","sequence":"additional","affiliation":[]},{"given":"A. Nag","family":"Nag","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagai","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nagano","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nagasaka","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Nagle","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nagy","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Nairz","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Nakahama","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nakamura","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nakano","sequence":"additional","affiliation":[]},{"given":"H.","family":"Nanjo","sequence":"additional","affiliation":[]},{"given":"F.","family":"Napolitano","sequence":"additional","affiliation":[]},{"given":"R. F.","family":"Naranjo Garcia","sequence":"additional","affiliation":[]},{"given":"R.","family":"Narayan","sequence":"additional","affiliation":[]},{"given":"I.","family":"Naryshkin","sequence":"additional","affiliation":[]},{"given":"T.","family":"Naumann","sequence":"additional","affiliation":[]},{"given":"G.","family":"Navarro","sequence":"additional","affiliation":[]},{"given":"P. Y.","family":"Nechaeva","sequence":"additional","affiliation":[]},{"given":"F.","family":"Nechansky","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Neep","sequence":"additional","affiliation":[]},{"given":"A.","family":"Negri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Negrini","sequence":"additional","affiliation":[]},{"given":"C.","family":"Nellist","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Nelson","sequence":"additional","affiliation":[]},{"given":"S.","family":"Nemecek","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nessi","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Neubauer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Neuhaus","sequence":"additional","affiliation":[]},{"given":"M.","family":"Neumann","sequence":"additional","affiliation":[]},{"given":"R.","family":"Newhouse","sequence":"additional","affiliation":[]},{"given":"P. R.","family":"Newman","sequence":"additional","affiliation":[]},{"given":"C. W.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. S.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"Y. W. Y.","family":"Ng","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ngair","sequence":"additional","affiliation":[]},{"given":"H. D. N.","family":"Nguyen","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nguyen Manh","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nibigira","sequence":"additional","affiliation":[]},{"given":"R. B.","family":"Nickerson","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nicolaidou","sequence":"additional","affiliation":[]},{"given":"D. S.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Nielsen","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nikiforou","sequence":"additional","affiliation":[]},{"given":"V.","family":"Nikolaenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nikolic-Audit","sequence":"additional","affiliation":[]},{"given":"K.","family":"Nikolopoulos","sequence":"additional","affiliation":[]},{"given":"P.","family":"Nilsson","sequence":"additional","affiliation":[]},{"given":"H. R.","family":"Nindhito","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Ninomiya","sequence":"additional","affiliation":[]},{"given":"A.","family":"Nisati","sequence":"additional","affiliation":[]},{"given":"N.","family":"Nishu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Nisius","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nitsche","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nitta","sequence":"additional","affiliation":[]},{"given":"T.","family":"Nobe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Noguchi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Nomidis","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Nomura","sequence":"additional","affiliation":[]},{"given":"M.","family":"Nordberg","sequence":"additional","affiliation":[]},{"given":"T.","family":"Novak","sequence":"additional","affiliation":[]},{"given":"O.","family":"Novgorodova","sequence":"additional","affiliation":[]},{"given":"R.","family":"Novotny","sequence":"additional","affiliation":[]},{"given":"L.","family":"Nozka","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ntekas","sequence":"additional","affiliation":[]},{"given":"E.","family":"Nurse","sequence":"additional","affiliation":[]},{"given":"F. G.","family":"Oakham","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oberlack","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ocariz","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ochi","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ochoa","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Ochoa-Ricoux","sequence":"additional","affiliation":[]},{"given":"K.","family":"O\u2019Connor","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Odaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Oerdek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ogrodnik","sequence":"additional","affiliation":[]},{"given":"A.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Oh","sequence":"additional","affiliation":[]},{"given":"C. C.","family":"Ohm","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oide","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Ojeda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Okawa","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okazaki","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"O\u2019Keefe","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Okumura","sequence":"additional","affiliation":[]},{"given":"T.","family":"Okuyama","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olariu","sequence":"additional","affiliation":[]},{"given":"L. F.","family":"Oleiro Seabra","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Olivares Pino","sequence":"additional","affiliation":[]},{"given":"D.","family":"Oliveira Damazio","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Oliver","sequence":"additional","affiliation":[]},{"given":"M. J. R.","family":"Olsson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Olszewski","sequence":"additional","affiliation":[]},{"given":"J.","family":"Olszowska","sequence":"additional","affiliation":[]},{"given":"D. C.","family":"O\u2019Neil","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"O\u2019neill","sequence":"additional","affiliation":[]},{"given":"A.","family":"Onofre","sequence":"additional","affiliation":[]},{"given":"P. U. E.","family":"Onyisi","sequence":"additional","affiliation":[]},{"given":"H.","family":"Oppen","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Oreglia","sequence":"additional","affiliation":[]},{"given":"G. E.","family":"Orellana","sequence":"additional","affiliation":[]},{"given":"D.","family":"Orestano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Orlando","sequence":"additional","affiliation":[]},{"given":"R. S.","family":"Orr","sequence":"additional","affiliation":[]},{"given":"V.","family":"O\u2019Shea","sequence":"additional","affiliation":[]},{"given":"R.","family":"Ospanov","sequence":"additional","affiliation":[]},{"given":"G.","family":"Otero y Garzon","sequence":"additional","affiliation":[]},{"given":"H.","family":"Otono","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Ott","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ouchrif","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ouellette","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ould-Saada","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ouraou","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Ouyang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Owen","sequence":"additional","affiliation":[]},{"given":"V. E.","family":"Ozcan","sequence":"additional","affiliation":[]},{"given":"N.","family":"Ozturk","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pacalt","sequence":"additional","affiliation":[]},{"given":"H. A.","family":"Pacey","sequence":"additional","affiliation":[]},{"given":"K.","family":"Pachal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pacheco Pages","sequence":"additional","affiliation":[]},{"given":"C.","family":"Padilla Aranda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pagan Griso","sequence":"additional","affiliation":[]},{"given":"M.","family":"Paganini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Palacino","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palazzo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Palestini","sequence":"additional","affiliation":[]},{"given":"M.","family":"Palka","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pallin","sequence":"additional","affiliation":[]},{"given":"P.","family":"Palni","sequence":"additional","affiliation":[]},{"given":"I.","family":"Panagoulias","sequence":"additional","affiliation":[]},{"given":"C. E.","family":"Pandini","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Panduro Vazquez","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Panizzo","sequence":"additional","affiliation":[]},{"given":"L.","family":"Paolozzi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Papadatos","sequence":"additional","affiliation":[]},{"given":"K.","family":"Papageorgiou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Parajuli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Paramonov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Paredes Hernandez","sequence":"additional","affiliation":[]},{"given":"S. R.","family":"Paredes Saenz","sequence":"additional","affiliation":[]},{"given":"B.","family":"Parida","sequence":"additional","affiliation":[]},{"given":"T. H.","family":"Park","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"M. A.","family":"Parker","sequence":"additional","affiliation":[]},{"given":"F.","family":"Parodi","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Parrish","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Parsons","sequence":"additional","affiliation":[]},{"given":"U.","family":"Parzefall","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pascual Dominguez","sequence":"additional","affiliation":[]},{"given":"V. R.","family":"Pascuzzi","sequence":"additional","affiliation":[]},{"given":"J. M. P.","family":"Pasner","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pasquali","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pasqualucci","sequence":"additional","affiliation":[]},{"given":"S.","family":"Passaggio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Pastore","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pasuwan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pataraia","sequence":"additional","affiliation":[]},{"given":"J. R.","family":"Pater","sequence":"additional","affiliation":[]},{"given":"A.","family":"Pathak","sequence":"additional","affiliation":[]},{"given":"J.","family":"Patton","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pauly","sequence":"additional","affiliation":[]},{"given":"J.","family":"Pearkes","sequence":"additional","affiliation":[]},{"given":"B.","family":"Pearson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pedersen","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pedraza Diaz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pedro","sequence":"additional","affiliation":[]},{"given":"T.","family":"Peiffer","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Peleganchuk","sequence":"additional","affiliation":[]},{"given":"O.","family":"Penc","sequence":"additional","affiliation":[]},{"given":"H.","family":"Peng","sequence":"additional","affiliation":[]},{"given":"B. S.","family":"Peralva","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Perego","sequence":"additional","affiliation":[]},{"given":"A. P.","family":"Pereira Peixoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pereira Sanchez","sequence":"additional","affiliation":[]},{"given":"D. V.","family":"Perepelitsa","sequence":"additional","affiliation":[]},{"given":"F.","family":"Peri","sequence":"additional","affiliation":[]},{"given":"L.","family":"Perini","sequence":"additional","affiliation":[]},{"given":"H.","family":"Pernegger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Perrella","sequence":"additional","affiliation":[]},{"given":"A.","family":"Perrevoort","sequence":"additional","affiliation":[]},{"given":"K.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"R. F. Y.","family":"Peters","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"T. C.","family":"Petersen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Petit","sequence":"additional","affiliation":[]},{"given":"A.","family":"Petridis","sequence":"additional","affiliation":[]},{"given":"C.","family":"Petridou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Petrov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Petrucci","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pettee","sequence":"additional","affiliation":[]},{"given":"N. E.","family":"Pettersson","sequence":"additional","affiliation":[]},{"given":"K.","family":"Petukhova","sequence":"additional","affiliation":[]},{"given":"A.","family":"Peyaud","sequence":"additional","affiliation":[]},{"given":"R.","family":"Pezoa","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pezzotti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Pham","sequence":"additional","affiliation":[]},{"given":"F. H.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"P. W.","family":"Phillips","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Phipps","sequence":"additional","affiliation":[]},{"given":"G.","family":"Piacquadio","sequence":"additional","affiliation":[]},{"given":"E.","family":"Pianori","sequence":"additional","affiliation":[]},{"given":"A.","family":"Picazio","sequence":"additional","affiliation":[]},{"given":"R. H.","family":"Pickles","sequence":"additional","affiliation":[]},{"given":"R.","family":"Piegaia","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pietreanu","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Pilcher","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Pilkington","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pinamonti","sequence":"additional","affiliation":[]},{"given":"J. L.","family":"Pinfold","sequence":"additional","affiliation":[]},{"given":"M.","family":"Pitt","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pizzimento","sequence":"additional","affiliation":[]},{"given":"M.-A.","family":"Pleier","sequence":"additional","affiliation":[]},{"given":"V.","family":"Pleskot","sequence":"additional","affiliation":[]},{"given":"E.","family":"Plotnikova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Podberezko","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poettgen","sequence":"additional","affiliation":[]},{"given":"R.","family":"Poggi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Poggioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pogrebnyak","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pohl","sequence":"additional","affiliation":[]},{"given":"I.","family":"Pokharel","sequence":"additional","affiliation":[]},{"given":"G.","family":"Polesello","sequence":"additional","affiliation":[]},{"given":"A.","family":"Poley","sequence":"additional","affiliation":[]},{"given":"A.","family":"Policicchio","sequence":"additional","affiliation":[]},{"given":"R.","family":"Polifka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Polini","sequence":"additional","affiliation":[]},{"given":"C. S.","family":"Pollard","sequence":"additional","affiliation":[]},{"given":"V.","family":"Polychronakos","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ponomarenko","sequence":"additional","affiliation":[]},{"given":"L.","family":"Pontecorvo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Popa","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Popeneciu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Portales","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Portillo Quintero","sequence":"additional","affiliation":[]},{"given":"S.","family":"Pospisil","sequence":"additional","affiliation":[]},{"given":"K.","family":"Potamianos","sequence":"additional","affiliation":[]},{"given":"I. N.","family":"Potrap","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Potter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Potti","sequence":"additional","affiliation":[]},{"given":"T.","family":"Poulsen","sequence":"additional","affiliation":[]},{"given":"J.","family":"Poveda","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Powell","sequence":"additional","affiliation":[]},{"given":"G.","family":"Pownall","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Pozo Astigarraga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Pralavorio","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prell","sequence":"additional","affiliation":[]},{"given":"D.","family":"Price","sequence":"additional","affiliation":[]},{"given":"M.","family":"Primavera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Prince","sequence":"additional","affiliation":[]},{"given":"M. L.","family":"Proffitt","sequence":"additional","affiliation":[]},{"given":"N.","family":"Proklova","sequence":"additional","affiliation":[]},{"given":"K.","family":"Prokofiev","sequence":"additional","affiliation":[]},{"given":"F.","family":"Prokoshin","sequence":"additional","affiliation":[]},{"given":"S.","family":"Protopopescu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Proudfoot","sequence":"additional","affiliation":[]},{"given":"M.","family":"Przybycien","sequence":"additional","affiliation":[]},{"given":"D.","family":"Pudzha","sequence":"additional","affiliation":[]},{"given":"A.","family":"Puri","sequence":"additional","affiliation":[]},{"given":"P.","family":"Puzo","sequence":"additional","affiliation":[]},{"given":"J.","family":"Qian","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Qin","sequence":"additional","affiliation":[]},{"given":"A.","family":"Quadt","sequence":"additional","affiliation":[]},{"given":"M.","family":"Queitsch-Maitland","sequence":"additional","affiliation":[]},{"given":"A.","family":"Qureshi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Racko","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ragusa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rahal","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Raine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rajagopalan","sequence":"additional","affiliation":[]},{"given":"A. Ramirez","family":"Morales","sequence":"additional","affiliation":[]},{"given":"K.","family":"Ran","sequence":"additional","affiliation":[]},{"given":"T.","family":"Rashid","sequence":"additional","affiliation":[]},{"given":"S.","family":"Raspopov","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rauch","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rauscher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rave","sequence":"additional","affiliation":[]},{"given":"B.","family":"Ravina","sequence":"additional","affiliation":[]},{"given":"I.","family":"Ravinovich","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Rawling","sequence":"additional","affiliation":[]},{"given":"M.","family":"Raymond","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Read","sequence":"additional","affiliation":[]},{"given":"N. P.","family":"Readioff","sequence":"additional","affiliation":[]},{"given":"M.","family":"Reale","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Rebuzzi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Redelbach","sequence":"additional","affiliation":[]},{"given":"G.","family":"Redlinger","sequence":"additional","affiliation":[]},{"given":"K.","family":"Reeves","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rehnisch","sequence":"additional","affiliation":[]},{"given":"J.","family":"Reichert","sequence":"additional","affiliation":[]},{"given":"D.","family":"Reikher","sequence":"additional","affiliation":[]},{"given":"A.","family":"Reiss","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rej","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rembser","sequence":"additional","affiliation":[]},{"given":"A.","family":"Renardi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Renda","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rescigno","sequence":"additional","affiliation":[]},{"given":"S.","family":"Resconi","sequence":"additional","affiliation":[]},{"given":"E. D.","family":"Resseguie","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rettie","sequence":"additional","affiliation":[]},{"given":"B.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"E.","family":"Reynolds","sequence":"additional","affiliation":[]},{"given":"O. L.","family":"Rezanova","sequence":"additional","affiliation":[]},{"given":"P.","family":"Reznicek","sequence":"additional","affiliation":[]},{"given":"E.","family":"Ricci","sequence":"additional","affiliation":[]},{"given":"R.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"S.","family":"Richter","sequence":"additional","affiliation":[]},{"given":"E.","family":"Richter-Was","sequence":"additional","affiliation":[]},{"given":"O.","family":"Ricken","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ridel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Rieck","sequence":"additional","affiliation":[]},{"given":"O.","family":"Rifki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rijssenbeek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rimoldi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rinaldi","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ripellino","sequence":"additional","affiliation":[]},{"given":"I.","family":"Riu","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Rivera Vergara","sequence":"additional","affiliation":[]},{"given":"F.","family":"Rizatdinova","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rizvi","sequence":"additional","affiliation":[]},{"given":"C.","family":"Rizzi","sequence":"additional","affiliation":[]},{"given":"R. T.","family":"Roberts","sequence":"additional","affiliation":[]},{"given":"S. H.","family":"Robertson","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Robinson","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Robles Gajardo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Robles Manzano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Robson","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rocchi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rocco","sequence":"additional","affiliation":[]},{"given":"C.","family":"Roda","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rodriguez Bosca","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rodriguez Perez","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rodriguez Rodriguez","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Rodr\u00edguez Vera","sequence":"additional","affiliation":[]},{"given":"S.","family":"Roe","sequence":"additional","affiliation":[]},{"given":"O.","family":"R\u00f8hne","sequence":"additional","affiliation":[]},{"given":"R.","family":"R\u00f6hrig","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Rojas","sequence":"additional","affiliation":[]},{"given":"B.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"C. P. A.","family":"Roland","sequence":"additional","affiliation":[]},{"given":"J.","family":"Roloff","sequence":"additional","affiliation":[]},{"given":"A.","family":"Romaniouk","sequence":"additional","affiliation":[]},{"given":"M.","family":"Romano","sequence":"additional","affiliation":[]},{"given":"N.","family":"Rompotis","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ronzani","sequence":"additional","affiliation":[]},{"given":"L.","family":"Roos","sequence":"additional","affiliation":[]},{"given":"S.","family":"Rosati","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rosin","sequence":"additional","affiliation":[]},{"given":"B. J.","family":"Rosser","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"E.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L. P.","family":"Rossi","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rossini","sequence":"additional","affiliation":[]},{"given":"R.","family":"Rosten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rotaru","sequence":"additional","affiliation":[]},{"given":"J.","family":"Rothberg","sequence":"additional","affiliation":[]},{"given":"B.","family":"Rottler","sequence":"additional","affiliation":[]},{"given":"D.","family":"Rousseau","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rovelli","sequence":"additional","affiliation":[]},{"given":"A.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"D.","family":"Roy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rozanov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Rozen","sequence":"additional","affiliation":[]},{"given":"X.","family":"Ruan","sequence":"additional","affiliation":[]},{"given":"F.","family":"R\u00fchr","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ruiz-Martinez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Rummler","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Rurikova","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Rusakovich","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Russell","sequence":"additional","affiliation":[]},{"given":"L.","family":"Rustige","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Rutherfoord","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"R\u00fcttinger","sequence":"additional","affiliation":[]},{"given":"M.","family":"Rybar","sequence":"additional","affiliation":[]},{"given":"G.","family":"Rybkin","sequence":"additional","affiliation":[]},{"given":"E. B.","family":"Rye","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ryzhov","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Sabater Iglesias","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sabatini","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sabato","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sacerdoti","sequence":"additional","affiliation":[]},{"given":"H. F-W.","family":"Sadrozinski","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sadykov","sequence":"additional","affiliation":[]},{"given":"F.","family":"Safai Tehrani","sequence":"additional","affiliation":[]},{"given":"B.","family":"Safarzadeh Samani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Safdari","sequence":"additional","affiliation":[]},{"given":"P.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"S.","family":"Saha","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sahinsoy","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sahu","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saimpert","sequence":"additional","affiliation":[]},{"given":"M.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"T.","family":"Saito","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sakamoto","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salamani","sequence":"additional","affiliation":[]},{"given":"G.","family":"Salamanna","sequence":"additional","affiliation":[]},{"given":"J. E.","family":"Salazar Loyola","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salnikov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Salt","sequence":"additional","affiliation":[]},{"given":"D.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"F.","family":"Salvatore","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salvucci","sequence":"additional","affiliation":[]},{"given":"A.","family":"Salzburger","sequence":"additional","affiliation":[]},{"given":"J.","family":"Samarati","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sammel","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sampsonidou","sequence":"additional","affiliation":[]},{"given":"J.","family":"S\u00e1nchez","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sanchez Pineda","sequence":"additional","affiliation":[]},{"given":"H.","family":"Sandaker","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Sander","sequence":"additional","affiliation":[]},{"given":"I. G.","family":"Sanderswood","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sandhoff","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sandoval","sequence":"additional","affiliation":[]},{"given":"D. P. C.","family":"Sankey","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sannino","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Sano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sansoni","sequence":"additional","affiliation":[]},{"given":"C.","family":"Santoni","sequence":"additional","affiliation":[]},{"given":"H.","family":"Santos","sequence":"additional","affiliation":[]},{"given":"S. N.","family":"Santpur","sequence":"additional","affiliation":[]},{"given":"A.","family":"Santra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sapronov","sequence":"additional","affiliation":[]},{"given":"J. G.","family":"Saraiva","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sardain","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sasaki","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sato","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sauerburger","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sauvan","sequence":"additional","affiliation":[]},{"given":"P.","family":"Savard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sawada","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Sawyer","sequence":"additional","affiliation":[]},{"given":"C.","family":"Sbarra","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sbrizzi","sequence":"additional","affiliation":[]},{"given":"T.","family":"Scanlon","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaarschmidt","sequence":"additional","affiliation":[]},{"given":"P.","family":"Schacht","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Schachtner","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schaefer","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schaeffer","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schaepe","sequence":"additional","affiliation":[]},{"given":"U.","family":"Sch\u00e4fer","sequence":"additional","affiliation":[]},{"given":"A. C.","family":"Schaffer","sequence":"additional","affiliation":[]},{"given":"D.","family":"Schaile","sequence":"additional","affiliation":[]},{"given":"R. D.","family":"Schamberger","sequence":"additional","affiliation":[]},{"given":"N.","family":"Scharmberg","sequence":"additional","affiliation":[]},{"given":"V. A.","family":"Schegelsky","sequence":"additional","affiliation":[]},{"given":"D.","family":"Scheirich","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schenck","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schernau","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schiavi","sequence":"additional","affiliation":[]},{"given":"L. K.","family":"Schildgen","sequence":"additional","affiliation":[]},{"given":"Z. M.","family":"Schillaci","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schioppa","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Schleicher","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schlenker","sequence":"additional","affiliation":[]},{"given":"K. R.","family":"Schmidt-Sommerfeld","sequence":"additional","affiliation":[]},{"given":"K.","family":"Schmieden","sequence":"additional","affiliation":[]},{"given":"C.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitt","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schmitz","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Schmoeckel","sequence":"additional","affiliation":[]},{"given":"L.","family":"Schoeffel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schoening","sequence":"additional","affiliation":[]},{"given":"P. G.","family":"Scholer","sequence":"additional","affiliation":[]},{"given":"E.","family":"Schopf","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schott","sequence":"additional","affiliation":[]},{"given":"J. F. P.","family":"Schouwenberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Schovancova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Schramm","sequence":"additional","affiliation":[]},{"given":"F.","family":"Schroeder","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schulte","sequence":"additional","affiliation":[]},{"given":"H-C.","family":"Schultz-Coulon","sequence":"additional","affiliation":[]},{"given":"M.","family":"Schumacher","sequence":"additional","affiliation":[]},{"given":"B. A.","family":"Schumm","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schune","sequence":"additional","affiliation":[]},{"given":"A.","family":"Schwartzman","sequence":"additional","affiliation":[]},{"given":"T. A.","family":"Schwarz","sequence":"additional","affiliation":[]},{"given":"Ph.","family":"Schwemling","sequence":"additional","affiliation":[]},{"given":"R.","family":"Schwienhorst","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sciandra","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sciolla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scodeggio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Scornajenghi","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scuri","sequence":"additional","affiliation":[]},{"given":"F.","family":"Scutti","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Scyboz","sequence":"additional","affiliation":[]},{"given":"C. D.","family":"Sebastiani","sequence":"additional","affiliation":[]},{"given":"P.","family":"Seema","sequence":"additional","affiliation":[]},{"given":"S. C.","family":"Seidel","sequence":"additional","affiliation":[]},{"given":"A.","family":"Seiden","sequence":"additional","affiliation":[]},{"given":"B. D.","family":"Seidlitz","sequence":"additional","affiliation":[]},{"given":"T.","family":"Seiss","sequence":"additional","affiliation":[]},{"given":"J. M.","family":"Seixas","sequence":"additional","affiliation":[]},{"given":"G.","family":"Sekhniaidze","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Sekula","sequence":"additional","affiliation":[]},{"given":"N.","family":"Semprini-Cesari","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sen","sequence":"additional","affiliation":[]},{"given":"C.","family":"Serfon","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serin","sequence":"additional","affiliation":[]},{"given":"L.","family":"Serkin","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sessa","sequence":"additional","affiliation":[]},{"given":"H.","family":"Severini","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sevova","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u0160filigoj","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sforza","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sfyrla","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shabalina","sequence":"additional","affiliation":[]},{"given":"J. D.","family":"Shahinian","sequence":"additional","affiliation":[]},{"given":"N. W.","family":"Shaikh","sequence":"additional","affiliation":[]},{"given":"D.","family":"Shaked Renous","sequence":"additional","affiliation":[]},{"given":"L. Y.","family":"Shan","sequence":"additional","affiliation":[]},{"given":"J. T.","family":"Shank","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shapiro","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Sharma","sequence":"additional","affiliation":[]},{"given":"P. B.","family":"Shatalov","sequence":"additional","affiliation":[]},{"given":"K.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Shaw","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shehade","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shen","sequence":"additional","affiliation":[]},{"given":"A. D.","family":"Sherman","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sherwood","sequence":"additional","affiliation":[]},{"given":"L.","family":"Shi","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shimizu","sequence":"additional","affiliation":[]},{"given":"C. O.","family":"Shimmin","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Shimogama","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shimojima","sequence":"additional","affiliation":[]},{"given":"I. P. J.","family":"Shipsey","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shirabe","sequence":"additional","affiliation":[]},{"given":"M.","family":"Shiyakova","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shlomi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Shmeleva","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Shochet","sequence":"additional","affiliation":[]},{"given":"J.","family":"Shojaii","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Shope","sequence":"additional","affiliation":[]},{"given":"S.","family":"Shrestha","sequence":"additional","affiliation":[]},{"given":"E. M.","family":"Shrif","sequence":"additional","affiliation":[]},{"given":"E.","family":"Shulga","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sicho","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Sickles","sequence":"additional","affiliation":[]},{"given":"P. E.","family":"Sidebo","sequence":"additional","affiliation":[]},{"given":"E.","family":"Sideras Haddad","sequence":"additional","affiliation":[]},{"given":"O.","family":"Sidiropoulou","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sidoti","sequence":"additional","affiliation":[]},{"given":"F.","family":"Siegert","sequence":"additional","affiliation":[]},{"given":"Dj.","family":"Sijacki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Silva","sequence":"additional","affiliation":[]},{"given":"M. V.","family":"Silva Oliveira","sequence":"additional","affiliation":[]},{"given":"S. B.","family":"Silverstein","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simion","sequence":"additional","affiliation":[]},{"given":"R.","family":"Simoniello","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Simpson-allsop","sequence":"additional","affiliation":[]},{"given":"S.","family":"Simsek","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sinervo","sequence":"additional","affiliation":[]},{"given":"V.","family":"Sinetckii","sequence":"additional","affiliation":[]},{"given":"S.","family":"Singh","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sioli","sequence":"additional","affiliation":[]},{"given":"I.","family":"Siral","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Sivoklokov","sequence":"additional","affiliation":[]},{"given":"J.","family":"Sj\u00f6lin","sequence":"additional","affiliation":[]},{"given":"E.","family":"Skorda","sequence":"additional","affiliation":[]},{"given":"P.","family":"Skubic","sequence":"additional","affiliation":[]},{"given":"M.","family":"Slawinska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Sliwa","sequence":"additional","affiliation":[]},{"given":"R.","family":"Slovak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Smakhtin","sequence":"additional","affiliation":[]},{"given":"B. H.","family":"Smart","sequence":"additional","affiliation":[]},{"given":"J.","family":"Smiesko","sequence":"additional","affiliation":[]},{"given":"N.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"S. Yu.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Smirnov","sequence":"additional","affiliation":[]},{"given":"L. N.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"O.","family":"Smirnova","sequence":"additional","affiliation":[]},{"given":"J. W.","family":"Smith","sequence":"additional","affiliation":[]},{"given":"M.","family":"Smizanska","sequence":"additional","affiliation":[]},{"given":"K.","family":"Smolek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Smykiewicz","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Snesarev","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Snoek","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"S.","family":"Snyder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Sobie","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soffer","sequence":"additional","affiliation":[]},{"given":"A.","family":"S\u00f8gaard","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sohns","sequence":"additional","affiliation":[]},{"given":"C. A.","family":"Solans Sanchez","sequence":"additional","affiliation":[]},{"given":"E. Yu.","family":"Soldatov","sequence":"additional","affiliation":[]},{"given":"U.","family":"Soldevila","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Solodkov","sequence":"additional","affiliation":[]},{"given":"A.","family":"Soloshenko","sequence":"additional","affiliation":[]},{"given":"O. V.","family":"Solovyanov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Solovyev","sequence":"additional","affiliation":[]},{"given":"P.","family":"Sommer","sequence":"additional","affiliation":[]},{"given":"H.","family":"Son","sequence":"additional","affiliation":[]},{"given":"W.","family":"Song","sequence":"additional","affiliation":[]},{"given":"W. Y.","family":"Song","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sopczak","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Sopio","sequence":"additional","affiliation":[]},{"given":"F.","family":"Sopkova","sequence":"additional","affiliation":[]},{"given":"C. L.","family":"Sotiropoulou","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sottocornola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Soualah","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Soukharev","sequence":"additional","affiliation":[]},{"given":"D.","family":"South","sequence":"additional","affiliation":[]},{"given":"S.","family":"Spagnolo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spalla","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spangenberg","sequence":"additional","affiliation":[]},{"given":"F.","family":"Span\u00f2","sequence":"additional","affiliation":[]},{"given":"D.","family":"Sperlich","sequence":"additional","affiliation":[]},{"given":"T. M.","family":"Spieker","sequence":"additional","affiliation":[]},{"given":"G.","family":"Spigo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spina","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Spiteri","sequence":"additional","affiliation":[]},{"given":"M.","family":"Spousta","sequence":"additional","affiliation":[]},{"given":"A.","family":"Stabile","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stamen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stamenkovic","sequence":"additional","affiliation":[]},{"given":"E.","family":"Stanecka","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stanislaus","sequence":"additional","affiliation":[]},{"given":"M. M.","family":"Stanitzki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stankaityte","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stapf","sequence":"additional","affiliation":[]},{"given":"E. A.","family":"Starchenko","sequence":"additional","affiliation":[]},{"given":"G. H.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stark","sequence":"additional","affiliation":[]},{"given":"P.","family":"Staroba","sequence":"additional","affiliation":[]},{"given":"P.","family":"Starovoitov","sequence":"additional","affiliation":[]},{"given":"S.","family":"St\u00e4rz","sequence":"additional","affiliation":[]},{"given":"R.","family":"Staszewski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stavropoulos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stegler","sequence":"additional","affiliation":[]},{"given":"P.","family":"Steinberg","sequence":"additional","affiliation":[]},{"given":"A. L.","family":"Steinhebel","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Stelzer","sequence":"additional","affiliation":[]},{"given":"O.","family":"Stelzer-Chilton","sequence":"additional","affiliation":[]},{"given":"H.","family":"Stenzel","sequence":"additional","affiliation":[]},{"given":"T. J.","family":"Stevenson","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Stewart","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Stockton","sequence":"additional","affiliation":[]},{"given":"G.","family":"Stoicea","sequence":"additional","affiliation":[]},{"given":"M.","family":"Stolarski","sequence":"additional","affiliation":[]},{"given":"S.","family":"Stonjek","sequence":"additional","affiliation":[]},{"given":"A.","family":"Straessner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"S.","family":"Strandberg","sequence":"additional","affiliation":[]},{"given":"M.","family":"Strauss","sequence":"additional","affiliation":[]},{"given":"P.","family":"Strizenec","sequence":"additional","affiliation":[]},{"given":"R.","family":"Str\u00f6hmer","sequence":"additional","affiliation":[]},{"given":"D. M.","family":"Strom","sequence":"additional","affiliation":[]},{"given":"R.","family":"Stroynowski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Strubig","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Stucci","sequence":"additional","affiliation":[]},{"given":"B.","family":"Stugu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Stupak","sequence":"additional","affiliation":[]},{"given":"N. A.","family":"Styles","sequence":"additional","affiliation":[]},{"given":"D.","family":"Su","sequence":"additional","affiliation":[]},{"given":"W.","family":"Su","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suchek","sequence":"additional","affiliation":[]},{"given":"V. V.","family":"Sulin","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Sullivan","sequence":"additional","affiliation":[]},{"given":"D. M. S.","family":"Sultan","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sultansoy","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sumida","sequence":"additional","affiliation":[]},{"given":"S.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"X.","family":"Sun","sequence":"additional","affiliation":[]},{"given":"K.","family":"Suruliz","sequence":"additional","affiliation":[]},{"given":"C. J. E.","family":"Suster","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Sutton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Suzuki","sequence":"additional","affiliation":[]},{"given":"M.","family":"Svatos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Swiatlowski","sequence":"additional","affiliation":[]},{"given":"S. P.","family":"Swift","sequence":"additional","affiliation":[]},{"given":"T.","family":"Swirski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Sydorenko","sequence":"additional","affiliation":[]},{"given":"I.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"M.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"T.","family":"Sykora","sequence":"additional","affiliation":[]},{"given":"D.","family":"Ta","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tackmann","sequence":"additional","affiliation":[]},{"given":"J.","family":"Taenzer","sequence":"additional","affiliation":[]},{"given":"A.","family":"Taffard","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tafirout","sequence":"additional","affiliation":[]},{"given":"H.","family":"Takai","sequence":"additional","affiliation":[]},{"given":"R.","family":"Takashima","sequence":"additional","affiliation":[]},{"given":"K.","family":"Takeda","sequence":"additional","affiliation":[]},{"given":"T.","family":"Takeshita","sequence":"additional","affiliation":[]},{"given":"E. P.","family":"Takeva","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Takubo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Talby","sequence":"additional","affiliation":[]},{"given":"A. A.","family":"Talyshev","sequence":"additional","affiliation":[]},{"given":"N. M.","family":"Tamir","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"R.","family":"Tanaka","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapia Araya","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tapprogge","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tarek Abouelfadl Mohamed","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tarem","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tariq","sequence":"additional","affiliation":[]},{"given":"G.","family":"Tarna","sequence":"additional","affiliation":[]},{"given":"G. F.","family":"Tartarelli","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tasevsky","sequence":"additional","affiliation":[]},{"given":"T.","family":"Tashiro","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tassi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tavares Delgado","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tayalati","sequence":"additional","affiliation":[]},{"given":"A. J.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"G. N.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"W.","family":"Taylor","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"Tee","sequence":"additional","affiliation":[]},{"given":"R.","family":"Teixeira De Lima","sequence":"additional","affiliation":[]},{"given":"P.","family":"Teixeira-Dias","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ten Kate","sequence":"additional","affiliation":[]},{"given":"J. J.","family":"Teoh","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terada","sequence":"additional","affiliation":[]},{"given":"K.","family":"Terashi","sequence":"additional","affiliation":[]},{"given":"J.","family":"Terron","sequence":"additional","affiliation":[]},{"given":"S.","family":"Terzo","sequence":"additional","affiliation":[]},{"given":"M.","family":"Testa","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Teuscher","sequence":"additional","affiliation":[]},{"given":"S. J.","family":"Thais","sequence":"additional","affiliation":[]},{"given":"T.","family":"Theveneaux-Pelzer","sequence":"additional","affiliation":[]},{"given":"F.","family":"Thiele","sequence":"additional","affiliation":[]},{"given":"D. W.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. O.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"J. P.","family":"Thomas","sequence":"additional","affiliation":[]},{"given":"P. D.","family":"Thompson","sequence":"additional","affiliation":[]},{"given":"L. A.","family":"Thomsen","sequence":"additional","affiliation":[]},{"given":"E.","family":"Thomson","sequence":"additional","affiliation":[]},{"given":"E. J.","family":"Thorpe","sequence":"additional","affiliation":[]},{"given":"R. E.","family":"Ticse Torres","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tikhomirov","sequence":"additional","affiliation":[]},{"given":"Yu. A.","family":"Tikhonov","sequence":"additional","affiliation":[]},{"given":"S.","family":"Timoshenko","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tipton","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tisserant","sequence":"additional","affiliation":[]},{"given":"K.","family":"Todome","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todorova-Nova","sequence":"additional","affiliation":[]},{"given":"S.","family":"Todt","sequence":"additional","affiliation":[]},{"given":"J.","family":"Tojo","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tok\u00e1r","sequence":"additional","affiliation":[]},{"given":"K.","family":"Tokushuku","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tolley","sequence":"additional","affiliation":[]},{"given":"K. G.","family":"Tomiwa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tomoto","sequence":"additional","affiliation":[]},{"given":"L.","family":"Tompkins","sequence":"additional","affiliation":[]},{"given":"B.","family":"Tong","sequence":"additional","affiliation":[]},{"given":"P.","family":"Tornambe","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torrence","sequence":"additional","affiliation":[]},{"given":"H.","family":"Torres","sequence":"additional","affiliation":[]},{"given":"E.","family":"Torr\u00f3 Pastor","sequence":"additional","affiliation":[]},{"given":"C.","family":"Tosciri","sequence":"additional","affiliation":[]},{"given":"J.","family":"Toth","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Tovey","sequence":"additional","affiliation":[]},{"given":"A.","family":"Traeet","sequence":"additional","affiliation":[]},{"given":"C. J.","family":"Treado","sequence":"additional","affiliation":[]},{"given":"T.","family":"Trefzger","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tresoldi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tricoli","sequence":"additional","affiliation":[]},{"given":"I. M.","family":"Trigger","sequence":"additional","affiliation":[]},{"given":"S.","family":"Trincaz-Duvoid","sequence":"additional","affiliation":[]},{"given":"D. A.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"W.","family":"Trischuk","sequence":"additional","affiliation":[]},{"given":"B.","family":"Trocm\u00e9","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trofymov","sequence":"additional","affiliation":[]},{"given":"C.","family":"Troncon","sequence":"additional","affiliation":[]},{"given":"F.","family":"Trovato","sequence":"additional","affiliation":[]},{"given":"L.","family":"Truong","sequence":"additional","affiliation":[]},{"given":"M.","family":"Trzebinski","sequence":"additional","affiliation":[]},{"given":"A.","family":"Trzupek","sequence":"additional","affiliation":[]},{"given":"F.","family":"Tsai","sequence":"additional","affiliation":[]},{"given":"J. C-L.","family":"Tseng","sequence":"additional","affiliation":[]},{"given":"P. V.","family":"Tsiareshka","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tsirigotis","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsiskaridze","sequence":"additional","affiliation":[]},{"given":"E. G.","family":"Tskhadadze","sequence":"additional","affiliation":[]},{"given":"M.","family":"Tsopoulou","sequence":"additional","affiliation":[]},{"given":"I. I.","family":"Tsukerman","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tsulaia","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tsuno","sequence":"additional","affiliation":[]},{"given":"D.","family":"Tsybychev","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Tu","sequence":"additional","affiliation":[]},{"given":"A.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"V.","family":"Tudorache","sequence":"additional","affiliation":[]},{"given":"T. T.","family":"Tulbure","sequence":"additional","affiliation":[]},{"given":"A. N.","family":"Tuna","sequence":"additional","affiliation":[]},{"given":"S.","family":"Turchikhin","sequence":"additional","affiliation":[]},{"given":"D.","family":"Turgeman","sequence":"additional","affiliation":[]},{"given":"I. Turk","family":"Cakir","sequence":"additional","affiliation":[]},{"given":"R. J.","family":"Turner","sequence":"additional","affiliation":[]},{"given":"R.","family":"Turra","sequence":"additional","affiliation":[]},{"given":"P. M.","family":"Tuts","sequence":"additional","affiliation":[]},{"given":"S.","family":"Tzamarias","sequence":"additional","affiliation":[]},{"given":"E.","family":"Tzovara","sequence":"additional","affiliation":[]},{"given":"G.","family":"Ucchielli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uchida","sequence":"additional","affiliation":[]},{"given":"F.","family":"Ukegawa","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unal","sequence":"additional","affiliation":[]},{"given":"A.","family":"Undrus","sequence":"additional","affiliation":[]},{"given":"G.","family":"Unel","sequence":"additional","affiliation":[]},{"given":"F. C.","family":"Ungaro","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Unno","sequence":"additional","affiliation":[]},{"given":"K.","family":"Uno","sequence":"additional","affiliation":[]},{"given":"J.","family":"Urban","sequence":"additional","affiliation":[]},{"given":"P.","family":"Urquijo","sequence":"additional","affiliation":[]},{"given":"G.","family":"Usai","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Uysal","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vacek","sequence":"additional","affiliation":[]},{"given":"B.","family":"Vachon","sequence":"additional","affiliation":[]},{"given":"K. O. H.","family":"Vadla","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaidya","sequence":"additional","affiliation":[]},{"given":"C.","family":"Valderanis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Valdes Santurio","sequence":"additional","affiliation":[]},{"given":"M.","family":"Valente","sequence":"additional","affiliation":[]},{"given":"S.","family":"Valentinetti","sequence":"additional","affiliation":[]},{"given":"A.","family":"Valero","sequence":"additional","affiliation":[]},{"given":"L.","family":"Val\u00e9ry","sequence":"additional","affiliation":[]},{"given":"R. A.","family":"Vallance","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vallier","sequence":"additional","affiliation":[]},{"given":"J. A.","family":"Valls Ferrer","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Van Daalen","sequence":"additional","affiliation":[]},{"given":"P.","family":"Van Gemmeren","sequence":"additional","affiliation":[]},{"given":"I.","family":"Van Vulpen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vanadia","sequence":"additional","affiliation":[]},{"given":"W.","family":"Vandelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vandenbroucke","sequence":"additional","affiliation":[]},{"given":"E. R.","family":"Vandewall","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vaniachine","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vannicola","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vari","sequence":"additional","affiliation":[]},{"given":"E. W.","family":"Varnes","sequence":"additional","affiliation":[]},{"given":"C.","family":"Varni","sequence":"additional","affiliation":[]},{"given":"T.","family":"Varol","sequence":"additional","affiliation":[]},{"given":"D.","family":"Varouchas","sequence":"additional","affiliation":[]},{"given":"K. E.","family":"Varvell","sequence":"additional","affiliation":[]},{"given":"M. E.","family":"Vasile","sequence":"additional","affiliation":[]},{"given":"G. A.","family":"Vasquez","sequence":"additional","affiliation":[]},{"given":"F.","family":"Vazeille","sequence":"additional","affiliation":[]},{"given":"D.","family":"Vazquez Furelos","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vazquez Schroeder","sequence":"additional","affiliation":[]},{"given":"J.","family":"Veatch","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vecchio","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"Veen","sequence":"additional","affiliation":[]},{"given":"L. M.","family":"Veloce","sequence":"additional","affiliation":[]},{"given":"F.","family":"Veloso","sequence":"additional","affiliation":[]},{"given":"S.","family":"Veneziano","sequence":"additional","affiliation":[]},{"given":"A.","family":"Ventura","sequence":"additional","affiliation":[]},{"given":"N.","family":"Venturi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Verbytskyi","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vercesi","sequence":"additional","affiliation":[]},{"given":"M.","family":"Verducci","sequence":"additional","affiliation":[]},{"given":"C. M.","family":"Vergel Infante","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vergis","sequence":"additional","affiliation":[]},{"given":"W.","family":"Verkerke","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Vermeulen","sequence":"additional","affiliation":[]},{"given":"M. C.","family":"Vetterli","sequence":"additional","affiliation":[]},{"given":"N.","family":"Viaux Maira","sequence":"additional","affiliation":[]},{"given":"M. Vicente","family":"Barreto Pinto","sequence":"additional","affiliation":[]},{"given":"T.","family":"Vickey","sequence":"additional","affiliation":[]},{"given":"O. E. Vickey","family":"Boeriu","sequence":"additional","affiliation":[]},{"given":"G. H. A.","family":"Viehhauser","sequence":"additional","affiliation":[]},{"given":"L.","family":"Vigani","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villa","sequence":"additional","affiliation":[]},{"given":"M.","family":"Villaplana Perez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Vilucchi","sequence":"additional","affiliation":[]},{"given":"M. G.","family":"Vincter","sequence":"additional","affiliation":[]},{"given":"G. S.","family":"Virdee","sequence":"additional","affiliation":[]},{"given":"A.","family":"Vishwakarma","sequence":"additional","affiliation":[]},{"given":"C.","family":"Vittori","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vivarelli","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vogel","sequence":"additional","affiliation":[]},{"given":"P.","family":"Vokac","sequence":"additional","affiliation":[]},{"given":"S. E.","family":"von Buddenbrock","sequence":"additional","affiliation":[]},{"given":"E.","family":"Von Toerne","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vorobel","sequence":"additional","affiliation":[]},{"given":"K.","family":"Vorobev","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vos","sequence":"additional","affiliation":[]},{"given":"J. H.","family":"Vossebeld","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vozak","sequence":"additional","affiliation":[]},{"given":"N.","family":"Vranjes","sequence":"additional","affiliation":[]},{"given":"M. Vranjes","family":"Milosavljevic","sequence":"additional","affiliation":[]},{"given":"V.","family":"Vrba","sequence":"additional","affiliation":[]},{"given":"M.","family":"Vreeswijk","sequence":"additional","affiliation":[]},{"given":"R.","family":"Vuillermet","sequence":"additional","affiliation":[]},{"given":"I.","family":"Vukotic","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wagner","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wagner-Kuhr","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wahdan","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wahlberg","sequence":"additional","affiliation":[]},{"given":"V. M.","family":"Walbrecht","sequence":"additional","affiliation":[]},{"given":"J.","family":"Walder","sequence":"additional","affiliation":[]},{"given":"R.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Walker","sequence":"additional","affiliation":[]},{"given":"W.","family":"Walkowiak","sequence":"additional","affiliation":[]},{"given":"V.","family":"Wallangen","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"A. Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"F.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Q.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.-J.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. T.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"W. X.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Wang","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wanotayaroj","sequence":"additional","affiliation":[]},{"given":"A.","family":"Warburton","sequence":"additional","affiliation":[]},{"given":"C. P.","family":"Ward","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Wardrope","sequence":"additional","affiliation":[]},{"given":"N.","family":"Warrack","sequence":"additional","affiliation":[]},{"given":"A.","family":"Washbrook","sequence":"additional","affiliation":[]},{"given":"A. T.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"M. F.","family":"Watson","sequence":"additional","affiliation":[]},{"given":"G.","family":"Watts","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Waugh","sequence":"additional","affiliation":[]},{"given":"A. F.","family":"Webb","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. A.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"S. M.","family":"Weber","sequence":"additional","affiliation":[]},{"given":"A. R.","family":"Weidberg","sequence":"additional","affiliation":[]},{"given":"J.","family":"Weingarten","sequence":"additional","affiliation":[]},{"given":"M.","family":"Weirich","sequence":"additional","affiliation":[]},{"given":"C.","family":"Weiser","sequence":"additional","affiliation":[]},{"given":"P. S.","family":"Wells","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wenaus","sequence":"additional","affiliation":[]},{"given":"T.","family":"Wengler","sequence":"additional","affiliation":[]},{"given":"S.","family":"Wenig","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wermes","sequence":"additional","affiliation":[]},{"given":"M. D.","family":"Werner","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wessels","sequence":"additional","affiliation":[]},{"given":"T. D.","family":"Weston","sequence":"additional","affiliation":[]},{"given":"K.","family":"Whalen","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Whallon","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Wharton","sequence":"additional","affiliation":[]},{"given":"A. S.","family":"White","sequence":"additional","affiliation":[]},{"given":"A.","family":"White","sequence":"additional","affiliation":[]},{"given":"M. J.","family":"White","sequence":"additional","affiliation":[]},{"given":"D.","family":"Whiteson","sequence":"additional","affiliation":[]},{"given":"B. W.","family":"Whitmore","sequence":"additional","affiliation":[]},{"given":"W.","family":"Wiedenmann","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiel","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wielers","sequence":"additional","affiliation":[]},{"given":"N.","family":"Wieseotte","sequence":"additional","affiliation":[]},{"given":"C.","family":"Wiglesworth","sequence":"additional","affiliation":[]},{"given":"L. A. M.","family":"Wiik-Fuchs","sequence":"additional","affiliation":[]},{"given":"H. G.","family":"Wilkens","sequence":"additional","affiliation":[]},{"given":"L. J.","family":"Wilkins","sequence":"additional","affiliation":[]},{"given":"H. H.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"S.","family":"Williams","sequence":"additional","affiliation":[]},{"given":"C.","family":"Willis","sequence":"additional","affiliation":[]},{"given":"S.","family":"Willocq","sequence":"additional","affiliation":[]},{"given":"I.","family":"Wingerter-Seez","sequence":"additional","affiliation":[]},{"given":"E.","family":"Winkels","sequence":"additional","affiliation":[]},{"given":"F.","family":"Winklmeier","sequence":"additional","affiliation":[]},{"given":"O. J.","family":"Winston","sequence":"additional","affiliation":[]},{"given":"B. T.","family":"Winter","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wittgen","sequence":"additional","affiliation":[]},{"given":"M.","family":"Wobisch","sequence":"additional","affiliation":[]},{"given":"A.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"T. M. H.","family":"Wolf","sequence":"additional","affiliation":[]},{"given":"R.","family":"Wolff","sequence":"additional","affiliation":[]},{"given":"R.","family":"W\u00f6lker","sequence":"additional","affiliation":[]},{"given":"J.","family":"Wollrath","sequence":"additional","affiliation":[]},{"given":"M. W.","family":"Wolter","sequence":"additional","affiliation":[]},{"given":"H.","family":"Wolters","sequence":"additional","affiliation":[]},{"given":"V. W. S.","family":"Wong","sequence":"additional","affiliation":[]},{"given":"N. L.","family":"Woods","sequence":"additional","affiliation":[]},{"given":"S. D.","family":"Worm","sequence":"additional","affiliation":[]},{"given":"B. K.","family":"Wosiek","sequence":"additional","affiliation":[]},{"given":"K. W.","family":"Wo\u017aniak","sequence":"additional","affiliation":[]},{"given":"K.","family":"Wraight","sequence":"additional","affiliation":[]},{"given":"S. L.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Wu","sequence":"additional","affiliation":[]},{"given":"T. R.","family":"Wyatt","sequence":"additional","affiliation":[]},{"given":"B. M.","family":"Wynne","sequence":"additional","affiliation":[]},{"given":"S.","family":"Xella","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xi","sequence":"additional","affiliation":[]},{"given":"X.","family":"Xiao","sequence":"additional","affiliation":[]},{"given":"I.","family":"Xiotidis","sequence":"additional","affiliation":[]},{"given":"D.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"L.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"T.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"W.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Xu","sequence":"additional","affiliation":[]},{"given":"B.","family":"Yabsley","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yacoob","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yajima","sequence":"additional","affiliation":[]},{"given":"D. P.","family":"Yallup","sequence":"additional","affiliation":[]},{"given":"N.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamaguchi","sequence":"additional","affiliation":[]},{"given":"A.","family":"Yamamoto","sequence":"additional","affiliation":[]},{"given":"M.","family":"Yamatani","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yamazaki","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Yan","sequence":"additional","affiliation":[]},{"given":"H. J.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"H. T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"T.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yang","sequence":"additional","affiliation":[]},{"given":"W-M.","family":"Yao","sequence":"additional","affiliation":[]},{"given":"Y. C.","family":"Yap","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Yasu","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yatsenko","sequence":"additional","affiliation":[]},{"given":"H.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"J.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"S.","family":"Ye","sequence":"additional","affiliation":[]},{"given":"I.","family":"Yeletskikh","sequence":"additional","affiliation":[]},{"given":"M. R.","family":"Yexley","sequence":"additional","affiliation":[]},{"given":"E.","family":"Yigitbasi","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yorita","sequence":"additional","affiliation":[]},{"given":"K.","family":"Yoshihara","sequence":"additional","affiliation":[]},{"given":"C. J. S.","family":"Young","sequence":"additional","affiliation":[]},{"given":"C.","family":"Young","sequence":"additional","affiliation":[]},{"given":"J.","family":"Yu","sequence":"additional","affiliation":[]},{"given":"R.","family":"Yuan","sequence":"additional","affiliation":[]},{"given":"X.","family":"Yue","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zaazoua","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zabinski","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zacharis","sequence":"additional","affiliation":[]},{"given":"E.","family":"Zaffaroni","sequence":"additional","affiliation":[]},{"given":"A. M.","family":"Zaitsev","sequence":"additional","affiliation":[]},{"given":"T.","family":"Zakareishvili","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zakharchuk","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zambito","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zanzi","sequence":"additional","affiliation":[]},{"given":"D. R.","family":"Zaripovas","sequence":"additional","affiliation":[]},{"given":"S. V.","family":"Zei\u00dfner","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zeitnitz","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zemaityte","sequence":"additional","affiliation":[]},{"given":"J. C.","family":"Zeng","sequence":"additional","affiliation":[]},{"given":"O.","family":"Zenin","sequence":"additional","affiliation":[]},{"given":"T.","family":"\u017deni\u0161","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zerwas","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zgubi\u010d","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"D. F.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhang","sequence":"additional","affiliation":[]},{"given":"P.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zhao","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zhemchugov","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zheng","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zhong","sequence":"additional","affiliation":[]},{"given":"B.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M. S.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"M.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"N.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhou","sequence":"additional","affiliation":[]},{"given":"C. G.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"C.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H. L.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"H.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"J.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"Y.","family":"Zhu","sequence":"additional","affiliation":[]},{"given":"X.","family":"Zhuang","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zhukov","sequence":"additional","affiliation":[]},{"given":"V.","family":"Zhulanov","sequence":"additional","affiliation":[]},{"given":"D.","family":"Zieminska","sequence":"additional","affiliation":[]},{"given":"N. I.","family":"Zimine","sequence":"additional","affiliation":[]},{"given":"S.","family":"Zimmermann","sequence":"additional","affiliation":[]},{"given":"Z.","family":"Zinonos","sequence":"additional","affiliation":[]},{"given":"M.","family":"Ziolkowski","sequence":"additional","affiliation":[]},{"given":"L.","family":"\u017divkovi\u0107","sequence":"additional","affiliation":[]},{"given":"G.","family":"Zobernig","sequence":"additional","affiliation":[]},{"given":"A.","family":"Zoccoli","sequence":"additional","affiliation":[]},{"given":"K.","family":"Zoch","sequence":"additional","affiliation":[]},{"given":"T. G.","family":"Zorbas","sequence":"additional","affiliation":[]},{"given":"R.","family":"Zou","sequence":"additional","affiliation":[]},{"given":"L.","family":"Zwalinski","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2023,2,9]]},"reference":[{"key":"1757_CR1","doi-asserted-by":"publisher","first-page":"1","DOI":"10.1016\/j.physletb.2012.08.020","volume":"716","author":"ATLAS Collaboration.","year":"2012","unstructured":"ATLAS Collaboration. Observation of a new particle in the search for the Standard Model Higgs boson with the ATLAS detector at the LHC. Phys. Lett. B 716, 1\u201329 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR2","doi-asserted-by":"publisher","first-page":"30","DOI":"10.1016\/j.physletb.2012.08.021","volume":"716","author":"CMS Collaboration.","year":"2012","unstructured":"CMS Collaboration. Observation of a new boson at a mass of 125\u2009GeV with the CMS experiment at the LHC. Phys. Lett. B 716, 30\u201361 (2012).","journal-title":"Phys. Lett. B"},{"key":"1757_CR3","doi-asserted-by":"publisher","first-page":"883","DOI":"10.1103\/PhysRevLett.38.883","volume":"38","author":"BW Lee","year":"1977","unstructured":"Lee, B. W., Quigg, C. & Thacker, H. B. Strength of weak interactions at very high energies and the Higgs boson mass. Phys. Rev. Lett. 38, 883\u2013885 (1977).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR4","doi-asserted-by":"publisher","first-page":"379","DOI":"10.1016\/0550-3213(85)90580-2","volume":"261","author":"MS Chanowitz","year":"1985","unstructured":"Chanowitz, M. S. & Gaillard, M. K. The TeV physics of strongly interacting W\u2019s and Z\u2019s. Nucl. Phys. B 261, 379\u2013431 (1985).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR5","unstructured":"Szleper, M. The Higgs boson and the physics of WW scattering before and after Higgs discovery. Preprint at https:\/\/arxiv.org\/abs\/1412.8367 (2014)."},{"key":"1757_CR6","doi-asserted-by":"publisher","first-page":"081","DOI":"10.1007\/JHEP10(2011)081","volume":"10","author":"R Contino","year":"2011","unstructured":"Contino, R., Pappadopulo, D., Marzocca, D. & Rattazzi, R. On the effect of resonances in composite Higgs phenomenology. J. High Energy Phys. 10, 081 (2011).","journal-title":"J. High Energy Phys."},{"key":"1757_CR7","doi-asserted-by":"publisher","first-page":"034","DOI":"10.1088\/1126-6708\/2002\/07\/034","volume":"07","author":"N Arkani-Hamed","year":"2002","unstructured":"Arkani-Hamed, N., Cohen, A. G., Katz, E. & Nelson, A. E. The littlest Higgs. J. High Energy Phys. 07, 034 (2002).","journal-title":"J. High Energy Phys."},{"key":"1757_CR8","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-013-2704-3","volume":"74","author":"A Djouadi","year":"2014","unstructured":"Djouadi, A. Implications of the Higgs discovery for the MSSM. Eur. Phys. J. C 74, 2704 (2014).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR9","doi-asserted-by":"publisher","first-page":"073005","DOI":"10.1103\/PhysRevD.74.073005","volume":"74","author":"OJP Eboli","year":"2006","unstructured":"Eboli, O. J. P., Gonzalez-Garcia, M. C. & Mizukoshi, J. K. pp\u2009\u2192\u2009jje\u00b1\u03bc\u00b1\u03bd\u03bd and jje\u00b1\u03bc\u2213\u03bd\u03bd at $$O({\\alpha }_{em}^{6})$$ and $$O({\\alpha }_{em}^{6})$$ for the study of the quartic electroweak gauge boson vertex at CERN LHC. Phys. Rev. D 74, 073005 (2006).","journal-title":"Phys. Rev. D"},{"key":"1757_CR10","doi-asserted-by":"publisher","first-page":"39","DOI":"10.1007\/JHEP05(2022)039","volume":"2022","author":"R Bellan","year":"2022","unstructured":"Bellan, R. et al. A sensitivity study of VBS and diboson WW to dimension-6 EFT operators at the LHC. J. High Energy Phys. 2022, 39 (2022).","journal-title":"J. High Energy Phys."},{"key":"1757_CR11","doi-asserted-by":"publisher","first-page":"389","DOI":"10.1140\/epjc\/s10052-019-6893-2","volume":"79","author":"R Gomez-Ambrosio","year":"2019","unstructured":"Gomez-Ambrosio, R. Studies of dimension-six EFT effects in vector boson scattering. Eur. Phys. J. C 79, 389 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR12","doi-asserted-by":"publisher","first-page":"161801","DOI":"10.1103\/PhysRevLett.123.161801","volume":"123","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak production of a same-sign W boson pair in association with two jets in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. Lett. 123, 161801 (2019).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR13","doi-asserted-by":"publisher","first-page":"469","DOI":"10.1016\/j.physletb.2019.05.012","volume":"793","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Observation of electroweak W\u00b1Z boson pair production in association with two jets in pp collisions at $$\\sqrt{s}=$$ 13 TeV with the ATLAS detector. Phys. Lett. B 793, 469\u2013492 (2019).","journal-title":"Phys. Lett. B"},{"key":"1757_CR14","doi-asserted-by":"publisher","first-page":"081801","DOI":"10.1103\/PhysRevLett.120.081801","volume":"120","author":"CMS Collaboration.","year":"2018","unstructured":"CMS Collaboration. Observation of electroweak production of same-sign W boson pairs in the two jet and two same-sign lepton final state in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Rev. Lett. 120, 081801 (2018).","journal-title":"Phys. Rev. Lett."},{"key":"1757_CR15","unstructured":"CMS Collaboration. Measurements of production cross sections of WZ and same-sign WW boson pairs in association with two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 809, 135710 (2020)."},{"key":"1757_CR16","doi-asserted-by":"publisher","first-page":"135992","DOI":"10.1016\/j.physletb.2020.135992","volume":"812","author":"CMS Collaboration.","year":"2021","unstructured":"CMS Collaboration. Evidence for electroweak production of four charged leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Phys. Lett. B 812, 135992 (2021).","journal-title":"Phys. Lett. B"},{"key":"1757_CR17","doi-asserted-by":"publisher","first-page":"053003","DOI":"10.1103\/PhysRevD.90.053003","volume":"90","author":"C Englert","year":"2014","unstructured":"Englert, C. & Spannowsky, M. Limitations and opportunities of off-shell coupling measurements. Phys. Rev. D 90, 053003 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR18","unstructured":"ATLAS Collaboration. The ATLAS experiment at the CERN Large Hadron Collider. J. Instrum. 3, S08003 (2008)."},{"key":"1757_CR19","unstructured":"ATLAS Collaboration. ATLAS insertable B-layer technical design report, ATLAS-TDR-19. CERN https:\/\/cds.cern.ch\/record\/1291633 (2010)."},{"key":"1757_CR20","doi-asserted-by":"publisher","first-page":"T05008","DOI":"10.1088\/1748-0221\/13\/05\/T05008","volume":"13","author":"B Abbott","year":"2018","unstructured":"Abbott, B. et al. Production and integration of the ATLAS Insertable B-Layer. J. Instrum. 13, T05008 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR21","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-4852-3","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Performance of the ATLAS trigger system in 2015. Eur. Phys. J. C 77, 317 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR22","doi-asserted-by":"publisher","first-page":"141","DOI":"10.1007\/JHEP03(2014)141","volume":"2014","author":"B J\u00e4ger","year":"2014","unstructured":"J\u00e4ger, B., Karlberg, A. & Zanderighi, G. Electroweak ZZjj production in the Standard Model and beyond in the POWHEG-BOX V2. J. High Energy Phys. 2014, 141 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR23","doi-asserted-by":"publisher","first-page":"040","DOI":"10.1007\/JHEP04(2015)040","volume":"04","author":"RD Ball","year":"2015","unstructured":"Ball, R. D. et al. Parton distributions for the LHC run II. J. High Energy Phys. 04, 040 (2015).","journal-title":"J. High Energy Phys."},{"key":"1757_CR24","doi-asserted-by":"publisher","first-page":"079","DOI":"10.1007\/JHEP07(2014)079","volume":"07","author":"J Alwall","year":"2014","unstructured":"Alwall, J. et al. The automated computation of tree-level and next-to-leading order differential cross sections, and their matching to parton shower simulations. J. High Energy Phys. 07, 079 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR25","doi-asserted-by":"publisher","first-page":"007","DOI":"10.1088\/1126-6708\/2009\/02\/007","volume":"02","author":"T Gleisberg","year":"2009","unstructured":"Gleisberg, T. et al. Event generation with SHERPA 1.1. J. High Energy Phys. 02, 007 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR26","doi-asserted-by":"publisher","first-page":"082","DOI":"10.1007\/JHEP12(2013)082","volume":"12","author":"N Kauer","year":"2013","unstructured":"Kauer, N. Interference effects for H\u2009\u2192\u2009WW\/ZZ $$\\to \\ell {\\overline{\\nu }}_{\\ell }\\overline{\\ell }{\\nu }_{\\ell }$$ searches in gluon fusion at the LHC. J. High Energy Phys. 12, 082, (2013).","journal-title":"J. High Energy Phys."},{"key":"1757_CR27","doi-asserted-by":"publisher","first-page":"033009","DOI":"10.1103\/PhysRevD.89.033009","volume":"89","author":"J Gao","year":"2014","unstructured":"Gao, J. et al. CT10 next-to-next-to-leading order global analysis of QCD. Phys. Rev. D 89, 033009 (2014).","journal-title":"Phys. Rev. D"},{"key":"1757_CR28","doi-asserted-by":"publisher","first-page":"126","DOI":"10.1088\/1126-6708\/2007\/09\/126","volume":"09","author":"S Frixione","year":"2007","unstructured":"Frixione, S., Ridolfi, G. & Nason, P. A positive-weight next-to-leading-order Monte Carlo for heavy flavour hadroproduction. J. High Energy Phys. 09, 126 (2007).","journal-title":"J. High Energy Phys."},{"key":"1757_CR29","doi-asserted-by":"publisher","first-page":"111","DOI":"10.1088\/1126-6708\/2009\/09\/111","volume":"09","author":"S Alioli","year":"2009","unstructured":"Alioli, S., Nason, P., Oleari, C. & Re, E. NLO single-top production matched with shower in POWHEG: s- and t-channel contributions. J. High Energy Phys. 09, 111 (2009).","journal-title":"J. High Energy Phys."},{"key":"1757_CR30","doi-asserted-by":"publisher","first-page":"130","DOI":"10.1007\/JHEP09(2012)130","volume":"09","author":"R Frederix","year":"2012","unstructured":"Frederix, R., Re, E. & Torrielli, P. Single-top t-channel hadroproduction in the four-flavour scheme with POWHEG and aMC@NLO. J. High Energy Phys. 09, 130 (2012).","journal-title":"J. High Energy Phys."},{"key":"1757_CR31","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-011-1547-z","volume":"71","author":"E Re","year":"2011","unstructured":"Re, E. Single-top Wt-channel production matched with parton showers using the POWHEG method. Eur. Phys. J. C 71, 1547 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR32","doi-asserted-by":"publisher","first-page":"852","DOI":"10.1016\/j.cpc.2008.01.036","volume":"178","author":"T Sj\u00f6strand","year":"2008","unstructured":"Sj\u00f6strand, T., Mrenna, S. & Skands, P. Z. A brief introduction to PYTHIA 8.1. Comput. Phys. Commun. 178, 852\u2013867 (2008).","journal-title":"Comput. Phys. Commun."},{"key":"1757_CR33","doi-asserted-by":"publisher","first-page":"244","DOI":"10.1016\/j.nuclphysb.2012.10.003","volume":"867","author":"RD Ball","year":"2013","unstructured":"Ball, R. D. et al. Parton distributions with LHC data. Nucl. Phys. B 867, 244\u2013289 (2013).","journal-title":"Nucl. Phys. B"},{"key":"1757_CR34","unstructured":"ATLAS Collaboration. ATLAS Pythia 8 tunes to 7\u2009TeV data, ATL-PHYS-PUB-2014-021 CERN https:\/\/cds.cern.ch\/record\/1966419 (2014)."},{"key":"1757_CR35","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-010-1429-9","volume":"70","author":"ATLAS Collaboration.","year":"2010","unstructured":"ATLAS Collaboration. The ATLAS simulation infrastructure. Eur. Phys. J. C 70, 823 (2010).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR36","doi-asserted-by":"publisher","first-page":"250","DOI":"10.1016\/S0168-9002(03)01368-8","volume":"506","author":"S Agostinelli","year":"2003","unstructured":"Agostinelli, S. et al. Geant4\u2014a simulation toolkit. Nucl. Instrum. Methods A 506, 250\u2013303 (2003).","journal-title":"Nucl. Instrum. Methods A"},{"key":"1757_CR37","doi-asserted-by":"publisher","first-page":"3","DOI":"10.1007\/s41781-021-00062-2","volume":"6","author":"G Aad","year":"2022","unstructured":"Aad, G. et al. Emulating the impact of additional proton-proton interactions in the ATLAS simulation by presampling sets of inelastic Monte Carlo events. Comput. Softw. Big Sci. 6, 3 (2022).","journal-title":"Comput. Softw. Big Sci."},{"key":"1757_CR38","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4120-y","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Muon reconstruction performance of the ATLAS detector in proton\u2013proton collision data at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$. Eur. Phys. J. C 76, 292 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR39","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-019-7140-6","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Electron reconstruction and identification in the ATLAS experiment using the 2015 and 2016 LHC proton\u2013proton collision data at$$\\sqrt{s}$$ = 13\u2009TeV. Eur. Phys. J. C 79, 639 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR40","doi-asserted-by":"publisher","first-page":"063","DOI":"10.1088\/1126-6708\/2008\/04\/063","volume":"04","author":"M Cacciari","year":"2008","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. The anti-kt jet clustering algorithm. J. High Energy Phys. 04, 063 (2008).","journal-title":"J. High Energy Phys."},{"key":"1757_CR41","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-012-1896-2","volume":"72","author":"M Cacciari","year":"2012","unstructured":"Cacciari, M., Salam, G. P. & Soyez, G. FastJet user manual. Eur. Phys. J. C 72, 1896 (2012).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR42","doi-asserted-by":"publisher","first-page":"072002","DOI":"10.1103\/PhysRevD.96.072002","volume":"96","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Jet energy scale measurements and their systematic uncertainties in proton\u2013proton collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 96, 072002 (2017).","journal-title":"Phys. Rev. D"},{"key":"1757_CR43","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4395-z","volume":"76","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Performance of pile-up mitigation techniques for jets in pp collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. Eur. Phys. J. C 76, 581 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR44","doi-asserted-by":"publisher","first-page":"970","DOI":"10.1140\/epjc\/s10052-019-7450-8","volume":"79","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. ATLAS b-jet identification performance and efficiency measurement with $$t\\overline{t}$$ events in pp collisions at $$\\sqrt{s}=13$$ TeV. Eur. Phys. J. C 79, 970 (2019).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR45","doi-asserted-by":"publisher","first-page":"032003","DOI":"10.1103\/PhysRevD.94.032003","volume":"94","author":"ATLAS Collaboration.","year":"2016","unstructured":"ATLAS Collaboration. Search for pair production of gluinos decaying via stop and sbottom in events with b-jets and large missing transverse momentum in pp collisions at $$\\sqrt{s}=13$$ TeV with the ATLAS detector. Phys. Rev. D 94, 032003 (2016).","journal-title":"Phys. Rev. D"},{"key":"1757_CR46","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-018-6288-9","volume":"78","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Performance of missing transverse momentum reconstruction with the ATLAS detector using proton\u2013proton collisions at $$\\sqrt{s}$$ = 13 TeV. Eur. Phys. J. C 78, 903 (2018).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR47","unstructured":"ATLAS Collaboration. Object-based missing transverse momentum significance in the ATLAS detector. ATLAS-CONF-2018-038. CERN https:\/\/cds.cern.ch\/record\/2630948 (2018)."},{"key":"1757_CR48","doi-asserted-by":"publisher","first-page":"135341","DOI":"10.1016\/j.physletb.2020.135341","volume":"803","author":"ATLAS Collaboration.","year":"2020","unstructured":"ATLAS Collaboration. Evidence for electroweak production of two jets in association with a em>Z\u03b3 pair in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 803, 135341 (2020).","journal-title":"Phys. Lett. B"},{"key":"1757_CR49","first-page":"031","volume":"04","author":"ATLAS Collaboration.","year":"2014","unstructured":"ATLAS Collaboration. Measurement of the electroweak production of dijets in association with a Z-boson and distributions sensitive to vector boson fusion in proton\u2013proton collisions at $$\\sqrt{s}=8\\,{{{\\rm{TeV}}}}$$ using the ATLAS detector. J. High Energy Phys. 04, 031 (2014).","journal-title":"J. High Energy Phys."},{"key":"1757_CR50","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-017-5007-2","volume":"77","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurements of electroweak Wjj production and constraints on anomalous gauge couplings with the ATLAS detector. Eur. Phys. J. C 77, 474 (2017).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR51","doi-asserted-by":"publisher","first-page":"206","DOI":"10.1016\/j.physletb.2017.10.040","volume":"775","author":"ATLAS Collaboration.","year":"2017","unstructured":"ATLAS Collaboration. Measurement of the cross-section for electroweak production of dijets in association with a Z boson in pp collisions at $$\\sqrt{s}=13\\,{{{\\rm{TeV}}}}$$ with the ATLAS detector. Phys. Lett. B 775, 206\u2013228 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR52","doi-asserted-by":"publisher","first-page":"032005","DOI":"10.1103\/PhysRevD.97.032005","volume":"97","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. $$ZZ\\to {\\ell }^{+}{\\ell }^{-}{\\ell }^{{\\,}^{\\prime} +}{\\ell }^{{\\,}^{\\prime} -}$$ cross-section measurements and search for anomalous triple gauge couplings in 13\u2009TeV pp collisions with the ATLAS detector. Phys. Rev. D 97, 032005 (2018).","journal-title":"Phys. Rev. D"},{"key":"1757_CR53","doi-asserted-by":"publisher","first-page":"318","DOI":"10.1016\/j.physletb.2017.11.049","volume":"776","author":"ATLAS Collaboration.","year":"2018","unstructured":"ATLAS Collaboration. Search for an invisibly decaying Higgs boson or dark matter candidates produced in association with a Z boson in pp collisions at $$\\sqrt{s}=$$ 13\u2009TeV with the ATLAS detector. Phys. Lett. B 776, 318\u2013337 (2018).","journal-title":"Phys. Lett. B"},{"key":"1757_CR54","unstructured":"ATLAS Collaboration. Luminosity determination in pp collisions at $$\\sqrt{s}=13$$ TeV using the ATLAS detector at the LHC, ATLAS-CONF-2019-021. CERN http:\/\/cds.cern.ch\/record\/2677054 (2019)."},{"key":"1757_CR55","doi-asserted-by":"publisher","first-page":"P07017","DOI":"10.1088\/1748-0221\/13\/07\/P07017","volume":"13","author":"G Avoni","year":"2018","unstructured":"Avoni, G. et al. The new lucid-2 detector for luminosity measurement and monitoring in atlas. J. Instrum. 13, P07017 (2018).","journal-title":"J. Instrum."},{"key":"1757_CR56","doi-asserted-by":"publisher","first-page":"023001","DOI":"10.1088\/0954-3899\/43\/2\/023001","volume":"43","author":"J Butterworth","year":"2016","unstructured":"Butterworth, J. et al. PDF4LHC recommendations for LHC Run II. J. Phys. G 43, 023001 (2016).","journal-title":"J. Phys. G"},{"key":"1757_CR57","doi-asserted-by":"publisher","first-page":"127","DOI":"10.1007\/JHEP10(2019)127","volume":"10","author":"ATLAS Collaboration.","year":"2019","unstructured":"ATLAS Collaboration. Measurement of ZZ production in the \u2113\u2113\u03bd\u03bd final state with the ATLAS detector in pp collisions at $$\\sqrt{s}=13$$ TeV. J. High Energy Phys. 10, 127 (2019).","journal-title":"J. High Energy Phys."},{"key":"1757_CR58","doi-asserted-by":"publisher","DOI":"10.1140\/epjc\/s10052-016-4018-8","volume":"76","author":"J Bellm","year":"2016","unstructured":"Bellm, J. et al. Herwig 7.0\/Herwig++ 3.0 release note. Eur. Phys. J. C 76, 196 (2016).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR59","doi-asserted-by":"publisher","first-page":"639","DOI":"10.1140\/epjc\/s10052-008-0798-9","volume":"58","author":"M B\u00e4hr","year":"2008","unstructured":"B\u00e4hr, M. et al. Herwig++ physics and manual. Eur. Phys. J. C 58, 639\u2013707 (2008).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR60","doi-asserted-by":"publisher","first-page":"1189","DOI":"10.1214\/aos\/1013203451","volume":"29","author":"JH Friedman","year":"2001","unstructured":"Friedman, J. H. Greedy function approximation: A gradient boosting machine. Ann. Stat. 29, 1189\u20131232 (2001).","journal-title":"Ann. Stat."},{"key":"1757_CR61","unstructured":"H\u00f6cker, A. et al. TMVA \u2013 toolkit for multivariate data analysis. Preprint at https:\/\/arxiv.org\/abs\/physics\/0703039 (2007)."},{"key":"1757_CR62","doi-asserted-by":"publisher","first-page":"1554","DOI":"10.1140\/epjc\/s10052-011-1554-0","volume":"71","author":"G Cowan","year":"2011","unstructured":"Cowan, G., Cranmer, K., Gross, E. & Vitells, O. Asymptotic formulae for likelihood-based tests of new physics. Eur. Phys. J. C 71, 1554 (2011).","journal-title":"Eur. Phys. J. C"},{"key":"1757_CR63","doi-asserted-by":"publisher","first-page":"682","DOI":"10.1016\/j.physletb.2017.10.020","volume":"774","author":"CMS Collaboration.","year":"2017","unstructured":"CMS Collaboration. Measurement of vector boson scattering and constraints on anomalous quartic couplings from events with four leptons and two jets in proton\u2013proton collisions at $$\\sqrt{s}=$$ 13 TeV. Phys. Lett. B 774, 682\u2013705 (2017).","journal-title":"Phys. Lett. B"},{"key":"1757_CR64","unstructured":"ATLAS Collaboration. ATLAS computing acknowledgements, ATL-SOFT-PUB-2021-003. CERN (2021) https:\/\/cds.cern.ch\/record\/2776662."},{"key":"1757_CR65","doi-asserted-by":"publisher","unstructured":"ATLAS Collaboration. Observation of electroweak production of two jets and a Z-boson pair with the ATLAS detector at the LHC (version 3). HEPData https:\/\/doi.org\/10.17182\/hepdata.93015.v3 (2022).","DOI":"10.17182\/hepdata.93015.v3"}],"container-title":["Nature Physics"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y","content-type":"text\/html","content-version":"vor","intended-application":"text-mining"},{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y.pdf","content-type":"application\/pdf","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,10]],"date-time":"2023-02-10T15:32:36Z","timestamp":1676043156000},"score":1,"resource":{"primary":{"URL":"https:\/\/www.nature.com\/articles\/s41567-022-01757-y"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2023,2,9]]},"references-count":65,"alternative-id":["1757"],"URL":"http:\/\/dx.doi.org\/10.1038\/s41567-022-01757-y","relation":{},"ISSN":["1745-2473","1745-2481"],"issn-type":[{"value":"1745-2473","type":"print"},{"value":"1745-2481","type":"electronic"}],"subject":["General Physics and Astronomy"],"published":{"date-parts":[[2023,2,9]]},"assertion":[{"value":"27 April 2020","order":1,"name":"received","label":"Received","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"15 August 2022","order":2,"name":"accepted","label":"Accepted","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"9 February 2023","order":3,"name":"first_online","label":"First Online","group":{"name":"ArticleHistory","label":"Article History"}},{"value":"The authors declare no competing interests.","order":1,"name":"Ethics","group":{"name":"EthicsHeading","label":"Competing interests"}}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala index fbf6f72c0..c27cebf65 100644 --- a/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala +++ b/dhp-workflows/dhp-doiboost/src/test/scala/eu/dnetlib/dhp/doiboost/crossref/CrossrefMappingTest.scala @@ -22,6 +22,13 @@ class CrossrefMappingTest { val logger: Logger = LoggerFactory.getLogger(Crossref2Oaf.getClass) val mapper = new ObjectMapper() + @Test + def testMissingAuthorParser():Unit = { + val json: String = Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/doiboost/crossref/s41567-022-01757-y.json")).mkString + val result = Crossref2Oaf.convert(json) + result.filter(o => o.isInstanceOf[Publication]).map(p=> p.asInstanceOf[Publication]).foreach(p =>assertTrue(p.getAuthor.size()>0)) + } + @Test def testFunderRelationshipsMapping(): Unit = { val template = Source diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java new file mode 100644 index 000000000..c71ccb439 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/MoveResult.java @@ -0,0 +1,84 @@ + +package eu.dnetlib.dhp; + +import static eu.dnetlib.dhp.PropagationConstant.isSparkSessionManaged; +import static eu.dnetlib.dhp.PropagationConstant.readPath; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.resulttocommunityfromorganization.SparkResultToCommunityFromOrganizationJob; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Result; + +/** + * @author miriam.baglioni + * @Date 15/01/24 + */ +public class MoveResult implements Serializable { + private static final Logger log = LoggerFactory.getLogger(MoveResult.class); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + MoveResult.class + .getResourceAsStream( + "/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + moveResults(spark, inputPath, outputPath); + + }); + } + + public static void moveResults(SparkSession spark, String inputPath, String outputPath) { + + ModelSupport.entityTypes + .keySet() + .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() + .forEach(e -> { + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset resultDataset = readPath(spark, inputPath + e.name(), resultClazz); + if (resultDataset.count() > 0) { + + resultDataset + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + } + + }); + + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java index d121b8b7e..06d0f95c2 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/api/Utils.java @@ -167,4 +167,11 @@ public class Utils implements Serializable { }); return projectMap; } + + public static List getCommunityIdList(String baseURL) throws IOException { + return getValidCommunities(baseURL) + .stream() + .map(community -> community.getId()) + .collect(Collectors.toList()); + } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 5d1b2b38d..e20fcb081 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -45,7 +45,7 @@ public class SparkBulkTagJob { .toString( SparkBulkTagJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -105,7 +105,6 @@ public class SparkBulkTagJob { Map>> dsm = cc.getEoscDatasourceMap(); for (String ds : datasources.collectAsList()) { - // final String dsId = ds.substring(3); if (!dsm.containsKey(ds)) { ArrayList> eoscList = new ArrayList<>(); dsm.put(ds, eoscList); @@ -116,13 +115,11 @@ public class SparkBulkTagJob { private static boolean isOKDatasource(Datasource ds) { final String compatibility = ds.getOpenairecompatibility().getClassid(); - boolean isOk = (compatibility.equalsIgnoreCase(OPENAIRE_3) || + return (compatibility.equalsIgnoreCase(OPENAIRE_3) || compatibility.equalsIgnoreCase(OPENAIRE_4) || compatibility.equalsIgnoreCase(OPENAIRE_CRIS) || compatibility.equalsIgnoreCase(OPENAIRE_DATA)) && ds.getCollectedfrom().stream().anyMatch(cf -> cf.getKey().equals(EOSC)); - - return isOk; } private static void execBulkTag( @@ -151,7 +148,13 @@ public class SparkBulkTagJob { .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + e.name()); + .json(outputPath + e.name());// writing the tagging in the working dir for entity + + readPath(spark, outputPath + e.name(), resultClazz) // copy the tagging in the actual result output path + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(inputPath + e.name()); }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java index b9f3bff52..a016509e5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareDatasourceCountryAssociation.java @@ -45,7 +45,7 @@ public class PrepareDatasourceCountryAssociation { .toString( PrepareDatasourceCountryAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -66,7 +66,7 @@ public class PrepareDatasourceCountryAssociation { conf, isSparkSessionManaged, spark -> { - removeOutputDir(spark, outputPath); + // removeOutputDir(spark, outputPath); prepareDatasourceCountryAssociation( spark, Arrays.asList(parser.get("whitelist").split(";")), @@ -90,7 +90,8 @@ public class PrepareDatasourceCountryAssociation { (FilterFunction) ds -> !ds.getDataInfo().getDeletedbyinference() && Optional.ofNullable(ds.getDatasourcetype()).isPresent() && Optional.ofNullable(ds.getDatasourcetype().getClassid()).isPresent() && - (allowedtypes.contains(ds.getDatasourcetype().getClassid()) || + ((Optional.ofNullable(ds.getJurisdiction()).isPresent() && + allowedtypes.contains(ds.getJurisdiction().getClassid())) || whitelist.contains(ds.getId()))); // filtering of the relations taking the non deleted by inference and those with IsProvidedBy as relclass diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java index 184d24751..884aa0e47 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/PrepareResultCountrySet.java @@ -32,7 +32,7 @@ public class PrepareResultCountrySet { .toString( PrepareResultCountrySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java index d9f6433a0..a0cc4c84a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/countrypropagation/SparkCountryPropagationJob.java @@ -35,7 +35,7 @@ public class SparkCountryPropagationJob { .toString( SparkCountryPropagationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java index 8d3432f06..bdfdde13b 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/PrepareInfo.java @@ -60,7 +60,7 @@ public class PrepareInfo implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java similarity index 97% rename from dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java rename to dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java index 27e502aba..4e30a6d6a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkResultToOrganizationFromSemRel.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkEntityToOrganizationFromSemRel.java @@ -27,8 +27,8 @@ import eu.dnetlib.dhp.resulttoorganizationfrominstrepo.SparkResultToOrganization import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; -public class SparkResultToOrganizationFromSemRel implements Serializable { - private static final Logger log = LoggerFactory.getLogger(SparkResultToOrganizationFromSemRel.class); +public class SparkEntityToOrganizationFromSemRel implements Serializable { + private static final Logger log = LoggerFactory.getLogger(SparkEntityToOrganizationFromSemRel.class); private static final int MAX_ITERATION = 5; public static final String NEW_RESULT_RELATION_PATH = "/newResultRelation"; public static final String NEW_PROJECT_RELATION_PATH = "/newProjectRelation"; @@ -39,7 +39,7 @@ public class SparkResultToOrganizationFromSemRel implements Serializable { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json")); + "/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java index 386ea1a5c..36a7523c5 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/StepActions.java @@ -3,8 +3,8 @@ package eu.dnetlib.dhp.entitytoorganizationfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.PropagationConstant.readPath; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; -import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_PROJECT_RELATION_PATH; +import static eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel.NEW_RESULT_RELATION_PATH; import java.io.Serializable; import java.util.*; @@ -20,7 +20,6 @@ import org.jetbrains.annotations.NotNull; import com.fasterxml.jackson.databind.ObjectMapper; import eu.dnetlib.dhp.KeyValueSet; -import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.oaf.Relation; import scala.Tuple2; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java index 95b870292..bc72a2ae1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep1.java @@ -31,7 +31,7 @@ public class PrepareResultOrcidAssociationStep1 { .toString( PrepareResultOrcidAssociationStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConf); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java index c60012a74..46894d0e1 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/PrepareResultOrcidAssociationStep2.java @@ -29,7 +29,7 @@ public class PrepareResultOrcidAssociationStep2 { .toString( PrepareResultOrcidAssociationStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java index a38b4da2e..c5d632658 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/orcidtoresultfromsemrel/SparkOrcidToResultFromSemRelJob.java @@ -2,7 +2,7 @@ package eu.dnetlib.dhp.orcidtoresultfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; -import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.util.List; import java.util.Optional; @@ -36,7 +36,7 @@ public class SparkOrcidToResultFromSemRelJob { .toString( SparkOrcidToResultFromSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); @@ -65,9 +65,8 @@ public class SparkOrcidToResultFromSemRelJob { Class resultClazz = (Class) Class.forName(resultClassName); SparkConf conf = new SparkConf(); - conf.set("hive.metastore.uris", parser.get("hive_metastore_uris")); - runWithSparkHiveSession( + runWithSparkSession( conf, isSparkSessionManaged, spark -> { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java index ac61e26f9..8f4e2ad9a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/PrepareProjectResultsAssociation.java @@ -28,7 +28,7 @@ public class PrepareProjectResultsAssociation { .toString( PrepareProjectResultsAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java index 1ec521af1..a6466716a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/projecttoresult/SparkResultToProjectThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToProjectThroughSemRelJob { .toString( SparkResultToProjectThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -64,7 +64,7 @@ public class SparkResultToProjectThroughSemRelJob { removeOutputDir(spark, outputPath); } execPropagation( - spark, outputPath, alreadyLinkedPath, potentialUpdatePath, saveGraph); + spark, outputPath, alreadyLinkedPath, potentialUpdatePath); }); } @@ -72,24 +72,23 @@ public class SparkResultToProjectThroughSemRelJob { SparkSession spark, String outputPath, String alreadyLinkedPath, - String potentialUpdatePath, - Boolean saveGraph) { + String potentialUpdatePath) { Dataset toaddrelations = readPath(spark, potentialUpdatePath, ResultProjectSet.class); Dataset alreadyLinked = readPath(spark, alreadyLinkedPath, ResultProjectSet.class); - if (saveGraph) { - toaddrelations - .joinWith( - alreadyLinked, - toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), - "left_outer") - .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) - .write() - .mode(SaveMode.Append) - .option("compression", "gzip") - .json(outputPath); - } + // if (saveGraph) { + toaddrelations + .joinWith( + alreadyLinked, + toaddrelations.col("resultId").equalTo(alreadyLinked.col("resultId")), + "left_outer") + .flatMap(mapRelationRn(), Encoders.bean(Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + // } } private static FlatMapFunction, Relation> mapRelationRn() { diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java index 54fa60168..be31cd46c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/PrepareResultCommunitySet.java @@ -34,7 +34,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java index df8ca3805..4f755266a 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromorganization/SparkResultToCommunityFromOrganizationJob.java @@ -36,7 +36,7 @@ public class SparkResultToCommunityFromOrganizationJob { .toString( SparkResultToCommunityFromOrganizationJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -76,23 +76,41 @@ public class SparkResultToCommunityFromOrganizationJob { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) + // .parallelStream() .forEach(e -> { - if (ModelSupport.isResult(e)) { - Class resultClazz = ModelSupport.entityTypes.get(e); - removeOutputDir(spark, outputPath + e.name()); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + Class resultClazz = ModelSupport.entityTypes.get(e); + removeOutputDir(spark, outputPath + e.name()); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); - } + log.info("executing left join"); + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + +// log +// .info( +// "reading results from " + outputPath + e.name() + " and copying them to " + inputPath +// + e.name()); +// Dataset tmp = readPath(spark, outputPath + e.name(), resultClazz); +// if (tmp.count() > 0){ +// +// tmp +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); +// } + + // } }); } @@ -109,11 +127,11 @@ public class SparkResultToCommunityFromOrganizationJob { .map(Context::getId) .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R res = (R) ret.getClass().newInstance(); + // @SuppressWarnings("unchecked") + // R res = (R) ret.getClass().newInstance(); - res.setId(ret.getId()); - List propagatedContexts = new ArrayList<>(); + // res.setId(ret.getId()); + // List propagatedContexts = new ArrayList<>(); for (String cId : communitySet) { if (!contextList.contains(cId)) { Context newContext = new Context(); @@ -127,11 +145,11 @@ public class SparkResultToCommunityFromOrganizationJob { PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_ORGANIZATION_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - propagatedContexts.add(newContext); + ret.getContext().add(newContext); } } - res.setContext(propagatedContexts); - ret.mergeFrom(res); + // res.setContext(propagatedContexts); + // ret.mergeFrom(res); } return ret; }; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java index 467e11a96..512dfa9be 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/PrepareResultCommunitySet.java @@ -38,7 +38,7 @@ public class PrepareResultCommunitySet { .toString( PrepareResultCommunitySet.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); parser.parseArgument(args); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java index 6e298cf94..f9c36d7ca 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromproject/SparkResultToCommunityFromProject.java @@ -44,7 +44,7 @@ public class SparkResultToCommunityFromProject implements Serializable { .toString( SparkResultToCommunityFromProject.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -86,23 +86,30 @@ public class SparkResultToCommunityFromProject implements Serializable { ModelSupport.entityTypes .keySet() .parallelStream() + .filter(e -> ModelSupport.isResult(e)) .forEach(e -> { - if (ModelSupport.isResult(e)) { - removeOutputDir(spark, outputPath + e.name()); - Class resultClazz = ModelSupport.entityTypes.get(e); - Dataset result = readPath(spark, inputPath + e.name(), resultClazz); + // if () { + removeOutputDir(spark, outputPath + e.name()); + Class resultClazz = ModelSupport.entityTypes.get(e); + Dataset result = readPath(spark, inputPath + e.name(), resultClazz); - result - .joinWith( - possibleUpdates, - result.col("id").equalTo(possibleUpdates.col("resultId")), - "left_outer") - .map(resultCommunityFn(), Encoders.bean(resultClazz)) - .write() - .mode(SaveMode.Overwrite) - .option("compression", "gzip") - .json(outputPath + e.name()); - } + result + .joinWith( + possibleUpdates, + result.col("id").equalTo(possibleUpdates.col("resultId")), + "left_outer") + .map(resultCommunityFn(), Encoders.bean(resultClazz)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + e.name()); + +// readPath(spark, outputPath + e.name(), resultClazz) +// .write() +// .mode(SaveMode.Overwrite) +// .option("compression", "gzip") +// .json(inputPath + e.name()); + // } }); } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java index 0c836a3ba..aede9ef05 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep1.java @@ -4,6 +4,7 @@ package eu.dnetlib.dhp.resulttocommunityfromsemrel; import static eu.dnetlib.dhp.PropagationConstant.*; import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkHiveSession; +import java.io.IOException; import java.util.Arrays; import java.util.List; @@ -15,6 +16,7 @@ import org.slf4j.LoggerFactory; import com.google.gson.Gson; +import eu.dnetlib.dhp.api.Utils; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.resulttocommunityfromorganization.ResultCommunityList; import eu.dnetlib.dhp.schema.oaf.Relation; @@ -26,11 +28,6 @@ import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; public class PrepareResultCommunitySetStep1 { private static final Logger log = LoggerFactory.getLogger(PrepareResultCommunitySetStep1.class); - private static final String COMMUNITY_LIST_XQUERY = "for $x in collection('/db/DRIVER/ContextDSResources/ContextDSResourceType')" - + " where $x//CONFIGURATION/context[./@type='community' or ./@type='ri']" - + " and $x//CONFIGURATION/context/param[./@name='status']/text() != 'hidden'" - + " return $x//CONFIGURATION/context/@id/string()"; - /** * associates to each result the set of community contexts they are associated to; associates to each target of a * relation with allowed semantics the set of community context it could possibly inherit from the source of the @@ -64,7 +61,7 @@ public class PrepareResultCommunitySetStep1 { .toString( PrepareResultCommunitySetStep1.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -88,10 +85,10 @@ public class PrepareResultCommunitySetStep1 { final List allowedsemrel = Arrays.asList(parser.get("allowedsemrels").split(";")); log.info("allowedSemRel: {}", new Gson().toJson(allowedsemrel)); - final String isLookupUrl = parser.get("isLookUpUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String baseURL = parser.get("baseURL"); + log.info("baseURL: {}", baseURL); - final List communityIdList = getCommunityList(isLookupUrl); + final List communityIdList = getCommunityList(baseURL); log.info("communityIdList: {}", new Gson().toJson(communityIdList)); final String resultType = resultClassName.substring(resultClassName.lastIndexOf(".") + 1).toLowerCase(); @@ -159,9 +156,8 @@ public class PrepareResultCommunitySetStep1 { .json(outputResultPath); } - public static List getCommunityList(final String isLookupUrl) throws ISLookUpException { - ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); - return isLookUp.quickSearchProfile(COMMUNITY_LIST_XQUERY); + public static List getCommunityList(final String baseURL) throws IOException { + return Utils.getCommunityIdList(baseURL); } } diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java index 0ddb19a1a..a53d3dfe3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/PrepareResultCommunitySetStep2.java @@ -31,7 +31,7 @@ public class PrepareResultCommunitySetStep2 { .toString( PrepareResultCommunitySetStep2.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java index f31a26230..3cf2f73c3 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttocommunityfromsemrel/SparkResultToCommunityThroughSemRelJob.java @@ -33,7 +33,7 @@ public class SparkResultToCommunityThroughSemRelJob { .toString( SparkResultToCommunityThroughSemRelJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -100,6 +100,7 @@ public class SparkResultToCommunityThroughSemRelJob { .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); + } private static MapFunction, R> contextUpdaterFn() { @@ -109,11 +110,11 @@ public class SparkResultToCommunityThroughSemRelJob { if (rcl.isPresent()) { Set contexts = new HashSet<>(); ret.getContext().forEach(c -> contexts.add(c.getId())); - List contextList = rcl + rcl .get() .getCommunityList() .stream() - .map( + .forEach( c -> { if (!contexts.contains(c)) { Context newContext = new Context(); @@ -127,19 +128,11 @@ public class SparkResultToCommunityThroughSemRelJob { PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_ID, PROPAGATION_RESULT_COMMUNITY_SEMREL_CLASS_NAME, ModelConstants.DNET_PROVENANCE_ACTIONS))); - return newContext; + ret.getContext().add(newContext); } - return null; - }) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - @SuppressWarnings("unchecked") - R r = (R) ret.getClass().newInstance(); + }); - r.setId(ret.getId()); - r.setContext(contextList); - ret.mergeFrom(r); } return ret; diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java new file mode 100644 index 000000000..11e942142 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/AppendNewRelations.java @@ -0,0 +1,68 @@ + +package eu.dnetlib.dhp.resulttoorganizationfrominstrepo; + +import static eu.dnetlib.dhp.PropagationConstant.*; +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.Serializable; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SaveMode; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.schema.oaf.Relation; + +/** + * @author miriam.baglioni + * @Date 09/12/23 + */ +public class AppendNewRelations implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(AppendNewRelations.class); + + public static void main(String[] args) throws Exception { + + String jsonConfiguration = IOUtils + .toString( + AppendNewRelations.class + .getResourceAsStream( + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = isSparkSessionManaged(parser); + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> appendNewRelation(spark, inputPath, outputPath)); + } + + private static void appendNewRelation(SparkSession spark, String inputPath, String outputPath) { + + readPath(spark, inputPath + "publication/relation", Relation.class) + .union(readPath(spark, inputPath + "dataset/relation", Relation.class)) + .union(readPath(spark, inputPath + "otherresearchproduct/relation", Relation.class)) + .union(readPath(spark, inputPath + "software/relation", Relation.class)) + .write() + .mode(SaveMode.Append) + .option("compression", "gzip") + .json(outputPath); + } + +} diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java index 1663afb32..57488bd20 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/PrepareResultInstRepoAssociation.java @@ -40,7 +40,7 @@ public class PrepareResultInstRepoAssociation { .toString( PrepareResultInstRepoAssociation.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -52,10 +52,13 @@ public class PrepareResultInstRepoAssociation { String inputPath = parser.get("sourcePath"); log.info("inputPath: {}", inputPath); - final String datasourceOrganizationPath = parser.get("datasourceOrganizationPath"); + final String workingPath = parser.get("workingPath"); + log.info("workingPath : {}", workingPath); + + final String datasourceOrganizationPath = workingPath + "/preparedInfo/datasourceOrganization"; log.info("datasourceOrganizationPath {}: ", datasourceOrganizationPath); - final String alreadyLinkedPath = parser.get("alreadyLinkedPath"); + final String alreadyLinkedPath = workingPath + "/preparedInfo/alreadyLinked"; log.info("alreadyLinkedPath {}: ", alreadyLinkedPath); List blacklist = Optional diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java index 0757ebccd..c8862b10c 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/SparkResultToOrganizationFromIstRepoJob.java @@ -47,7 +47,7 @@ public class SparkResultToOrganizationFromIstRepoJob { .toString( SparkResultToOrganizationFromIstRepoJob.class .getResourceAsStream( - "/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); + "/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json")); final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); @@ -119,7 +119,7 @@ public class SparkResultToOrganizationFromIstRepoJob { "left_outer") .flatMap(createRelationFn(), Encoders.bean(Relation.class)) .write() - .mode(SaveMode.Append) + .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath); } diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json deleted file mode 100644 index 9a2eadaa7..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/datasourcemaster_parameters.json +++ /dev/null @@ -1,32 +0,0 @@ -[ - { - "paramName": "p", - "paramLongName": "hdfsPath", - "paramDescription": "the path where storing the sequential file", - "paramRequired": true - }, - { - "paramName": "nn", - "paramLongName": "hdfsNameNode", - "paramDescription": "the name node on hdfs", - "paramRequired": true - }, - { - "paramName": "pgurl", - "paramLongName": "postgresUrl", - "paramDescription": "postgres url, example: jdbc:postgresql://localhost:5432/testdb", - "paramRequired": true - }, - { - "paramName": "pguser", - "paramLongName": "postgresUser", - "paramDescription": "postgres user", - "paramRequired": false - }, - { - "paramName": "pgpasswd", - "paramLongName": "postgresPassword", - "paramDescription": "postgres password", - "paramRequired": false - } -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json deleted file mode 100644 index 5aace346d..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eosc_bulkTag_parameters.json +++ /dev/null @@ -1,41 +0,0 @@ -[ - - { - "paramName":"s", - "paramLongName":"sourcePath", - "paramDescription": "the path of the sequencial file to read", - "paramRequired": true - }, - { - "paramName": "dmp", - "paramLongName":"datasourceMapPath", - "paramDescription": "the path where the association datasource master has been stored", - "paramRequired": true - }, - { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - }, - { - "paramName": "wp", - "paramLongName": "workingPath", - "paramDescription": "the path used to store temporary output files", - "paramRequired": true - }, - { - "paramName": "ssm", - "paramLongName": "isSparkSessionManaged", - "paramDescription": "true if the spark session is managed, false otherwise", - "paramRequired": false - }, - { - - "paramName": "rt", - "paramLongName": "resultType", - "paramDescription": "the result type", - "paramRequired": true - } - -] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml deleted file mode 100644 index fe82ae194..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/config-default.xml +++ /dev/null @@ -1,54 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml deleted file mode 100644 index 16c8c4e19..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/workflow.xml +++ /dev/null @@ -1,197 +0,0 @@ - - - - sourcePath - the source path - - - outputPath - sets the outputPath - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - ${wf:conf('resumeFrom') eq 'PrepareInfo'} - - - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - - yarn - cluster - PrepareResultOrganizationAssociation - eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --graphPath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --relationPath${workingDir}/preparedInfo/relation - - - - - - - - yarn - cluster - resultToOrganizationFromSemRel - eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkResultToOrganizationFromSemRel - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --conf spark.sql.shuffle.partitions=3840 - - --relationPath${workingDir}/preparedInfo/relation - --outputPath${outputPath}/relation - --leavesPath${workingDir}/preparedInfo/leavesPath - --childParentPath${workingDir}/preparedInfo/childParentPath - --resultOrgPath${workingDir}/preparedInfo/resultOrgPath - --projectOrganizationPath${workingDir}/preparedInfo/projectOrganizationPath - --hive_metastore_uris${hive_metastore_uris} - --workingDir${workingDir}/working - --iterations${iterations} - - - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml deleted file mode 100644 index 9e91c06fb..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/workflow.xml +++ /dev/null @@ -1,184 +0,0 @@ - - - - sourcePath - the source path - - - allowedsemrels - the allowed semantics - - - outputPath - the output path - - - - - ${jobTracker} - ${nameNode} - - - oozie.action.sharelib.for.spark - ${oozieActionShareLibForSpark2} - - - - - - - - Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - - - - - - - - - - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - - - - yarn - cluster - PrepareProjectResultsAssociation - eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - - --sourcePath${sourcePath}/relation - --allowedsemrels${allowedsemrels} - --hive_metastore_uris${hive_metastore_uris} - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - yarn - cluster - ProjectToResultPropagation - eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob - dhp-enrichment-${projectVersion}.jar - - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} - --driver-memory=${sparkDriverMemory} - --conf spark.extraListeners=${spark2ExtraListeners} - --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} - --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} - --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --conf spark.dynamicAllocation.enabled=true - --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - - --saveGraph${saveGraph} - --hive_metastore_uris${hive_metastore_uris} - --outputPath${outputPath}/relation - --potentialUpdatePath${workingDir}/preparedInfo/potentialUpdates - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked - - - - - - - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml deleted file mode 100644 index 2744ea92b..000000000 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/config-default.xml +++ /dev/null @@ -1,58 +0,0 @@ - - - jobTracker - yarnRM - - - nameNode - hdfs://nameservice1 - - - oozie.use.system.libpath - true - - - oozie.action.sharelib.for.spark - spark2 - - - hive_metastore_uris - thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 - - - spark2YarnHistoryServerAddress - http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 - - - spark2EventLogDir - /user/spark/spark2ApplicationHistory - - - spark2ExtraListeners - com.cloudera.spark.lineage.NavigatorAppListener - - - spark2SqlQueryExecutionListeners - com.cloudera.spark.lineage.NavigatorQueryListener - - - sparkExecutorNumber - 4 - - - sparkDriverMemory - 15G - - - sparkExecutorMemory - 6G - - - sparkExecutorCores - 1 - - - spark2MaxExecutors - 50 - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties new file mode 100644 index 000000000..05db04090 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/job.properties @@ -0,0 +1,28 @@ +sourcePath=/tmp/beta_provision/graph/10_graph_orcid_enriched +resumeFrom=ResultProject +allowedsemrelsorcidprop=isSupplementedBy;isSupplementTo +allowedsemrelsresultproject=isSupplementedBy;isSupplementTo +allowedsemrelscommunitysemrel=isSupplementedBy;isSupplementTo +datasourceWhitelistForCountryPropagation=10|opendoar____::16e6a3326dd7d868cbc926602a61e4d0;10|openaire____::fdb035c8b3e0540a8d9a561a6c44f4de;10|eurocrisdris::fe4903425d9040f680d8610d9079ea14;10|openaire____::5b76240cc27a58c6f7ceef7d8c36660e;10|openaire____::172bbccecf8fca44ab6a6653e84cb92a;10|openaire____::149c6590f8a06b46314eed77bfca693f;10|eurocrisdris::a6026877c1a174d60f81fd71f62df1c1;10|openaire____::4692342f0992d91f9e705c26959f09e0;10|openaire____::8d529dbb05ec0284662b391789e8ae2a;10|openaire____::345c9d171ef3c5d706d08041d506428c;10|opendoar____::1c1d4df596d01da60385f0bb17a4a9e0;10|opendoar____::7a614fd06c325499f1680b9896beedeb;10|opendoar____::1ee3dfcd8a0645a25a35977997223d22;10|opendoar____::d296c101daa88a51f6ca8cfc1ac79b50;10|opendoar____::798ed7d4ee7138d49b8828958048130a;10|openaire____::c9d2209ecc4d45ba7b4ca7597acb88a2;10|eurocrisdris::c49e0fe4b9ba7b7fab717d1f0f0a674d;10|eurocrisdris::9ae43d14471c4b33661fedda6f06b539;10|eurocrisdris::432ca599953ff50cd4eeffe22faf3e48 +#allowedtypes=pubsrepository::institutional +allowedtypes=Institutional +outputPath=/tmp/miriam/graph/11_graph_orcid +pathMap ={"author":"$['author'][*]['fullname']", \ + "title":"$['title'][*]['value']",\ + "orcid":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid')]['value']" ,\ + "orcid_pending":"$['author'][*]['pid'][*][?(@['qualifier']['classid']=='orcid_pending')]['value']" ,\ + "contributor" : "$['contributor'][*]['value']",\ + "description" : "$['description'][*]['value']",\ + "subject" :"$['subject'][*]['value']" , \ + "fos" : "$['subject'][?(@['qualifier']['classid']=='FOS')].value" ,\ + "sdg" : "$['subject'][?(@['qualifier']['classid']=='SDG')].value",\ + "journal":"$['journal'].name",\ + "hostedby":"$['instance'][*]['hostedby']['key']",\ + "collectedfrom":"$['instance'][*]['collectedfrom']['key']",\ + "publisher":"$['publisher'].value",\ + "publicationyear":"$['dateofacceptance'].value"} +blacklist=empty +allowedpids=orcid;orcid_pending +baseURL = https://services.openaire.eu/openaire/community/ +iterations=1 + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt new file mode 100644 index 000000000..b20259414 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/import.txt @@ -0,0 +1,10 @@ +## This is a classpath-based import file (this header is required) +orcid_propagation classpath eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app +bulk_tagging classpath eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app +affiliation_inst_repo classpath eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app +entity_semantic_relation classpath eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app +community_organization classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app +result_project classpath eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app +community_project classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app +community_sem_rel classpath eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app +country_propagation classpath eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml new file mode 100644 index 000000000..8e91707b6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/main/oozie_app/workflow.xml @@ -0,0 +1,324 @@ + + + + + sourcePath + the source path + + + allowedsemrelsorcidprop + the semantic relationships allowed for propagation + + + allowedsemrelsresultproject + the allowed semantics + + + allowedsemrelscommunitysemrel + the semantic relationships allowed for propagation + + + datasourceWhitelistForCountryPropagation + the white list + + + allowedtypes + the allowed types + + + outputPath + the output path + + + pathMap + the json path associated to each selection field + + + blacklist + list of datasources in blacklist for the affiliation from instrepo propagation + + + + hiveDbName + the target hive database name + + + hiveJdbcUrl + hive server jdbc url + + + hiveMetastoreUris + hive server metastore URIs + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + ${wf:conf('resumeFrom') eq 'BulkTagging'} + ${wf:conf('resumeFrom') eq 'AffiliationInstitutionalRepository'} + ${wf:conf('resumeFrom') eq 'AffiliationSemanticRelation'} + ${wf:conf('resumeFrom') eq 'CommunityOrganization'} + ${wf:conf('resumeFrom') eq 'ResultProject'} + ${wf:conf('resumeFrom') eq 'CommunityProject'} + ${wf:conf('resumeFrom') eq 'CommunitySemanticRelation'} + ${wf:conf('resumeFrom') eq 'CountryPropagation'} + + + + + + + + ${wf:appPath()}/orcid_propagation + + + + + sourcePath + ${sourcePath} + + + allowedsemrels + ${allowedsemrelsorcidprop} + + + outputPath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/bulk_tagging + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + pathMap + ${pathMap} + + + + + + + + + + ${wf:appPath()}/affiliation_inst_repo + + + + + sourcePath + ${outputPath} + + + blacklist + ${blacklist} + + + + + + + + + + ${wf:appPath()}/entity_semantic_relation + + + + + sourcePath + ${outputPath} + + + iterations + ${iterations} + + + + + + + + + + ${wf:appPath()}/community_organization + + + + + sourcePath + ${outputPath} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/result_project + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelsresultproject} + + + + + + + + + + ${wf:appPath()}/community_project + + + + + sourcePath + ${outputPath} + + + + + + + + + + ${wf:appPath()}/community_sem_rel + + + + + sourcePath + ${outputPath} + + + allowedsemrels + ${allowedsemrelscommunitysemrel} + + + baseURL + ${baseURL} + + + + + + + + + + ${wf:appPath()}/country_propagation + + + + + sourcePath + ${outputPath} + + + whitelist + ${datasourceWhitelistForCountryPropagation} + + + allowedtypes + ${allowedtypes} + + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_bulkTag_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/input_bulkTag_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml new file mode 100644 index 000000000..2695253e6 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/config-default.xml @@ -0,0 +1,62 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 5G + + + sparkExecutorCores + 4 + + + memoryOverhead + 3G + + + partitions + 3284 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml similarity index 54% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml index 03373eda0..c7a9e8a26 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/bulktag/oozie_app/workflow.xml @@ -9,13 +9,14 @@ the json path associated to each selection field - outputPath - the output path + baseURL + The URL to access the community APIs - baseURL - the community API base URL + startFrom> + undelete + @@ -29,71 +30,34 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + - - + + - + - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - yarn-cluster cluster - bulkTagging-result + bulkTagging eu.dnetlib.dhp.bulktag.SparkBulkTagJob dhp-enrichment-${projectVersion}.jar @@ -101,13 +65,15 @@ --executor-memory=${sparkExecutorMemory} --executor-cores=${sparkExecutorCores} --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=${memoryOverhead} + --conf spark.sql.shuffle.partitions=${partitions} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath}/ - --outputPath${outputPath}/ + --outputPath${workingDir}/bulktag/ --pathMap${pathMap} --baseURL${baseURL} @@ -115,6 +81,8 @@ + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json similarity index 99% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json index f217e2458..d3cde8b74 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_countrypropagation_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_countrypropagation_parameters.json @@ -29,4 +29,4 @@ "paramDescription": "true if the spark session is managed, false otherwise", "paramRequired": false } -] \ No newline at end of file +] diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareassoc_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareassoc_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/input_prepareresultcountry_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/input_prepareresultcountry_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml new file mode 100644 index 000000000..1cb0b8a5e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/config-default.xml @@ -0,0 +1,58 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + oozie.action.sharelib.for.spark + spark2 + + + hive_metastore_uris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + spark2YarnHistoryServerAddress + http://iis-cdh5-test-gw.ocean.icm.edu.pl:18089 + + + spark2EventLogDir + /user/spark/spark2ApplicationHistory + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + + + sparkExecutorNumber + 4 + + + sparkDriverMemory + 15G + + + sparkExecutorMemory + 5G + + + sparkExecutorCores + 4 + + + spark2MaxExecutors + 50 + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml similarity index 72% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml index 271ccbf72..b9cf69517 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/countrypropagation/oozie_app/workflow.xml @@ -13,10 +13,9 @@ the allowed types - outputPath - the output path + startFrom + undelete - @@ -30,7 +29,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -38,57 +45,13 @@ - - + + - + - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - @@ -110,20 +73,20 @@ --sourcePath${sourcePath} --whitelist${whitelist} --allowedtypes${allowedtypes} - --outputPath${workingDir}/preparedInfo + --outputPath${workingDir}/country/preparedInfo - + - - - - - + + + + + - + yarn cluster @@ -144,16 +107,16 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/publication - --outputPath${workingDir}/publication - --workingPath${workingDir}/workingP + --outputPath${workingDir}/country/publication + --workingPath${workingDir}/country/workingP --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo - + yarn cluster @@ -174,16 +137,16 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --outputPath${workingDir}/dataset - --workingPath${workingDir}/workingD + --outputPath${workingDir}/country/dataset + --workingPath${workingDir}/country/workingD --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo - + yarn cluster @@ -204,16 +167,16 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --outputPath${workingDir}/otherresearchproduct - --workingPath${workingDir}/workingO + --outputPath${workingDir}/country/otherresearchproduct + --workingPath${workingDir}/country/workingO --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo - + yarn cluster @@ -234,25 +197,25 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --outputPath${workingDir}/software - --workingPath${workingDir}/workingS + --outputPath${workingDir}/country/software + --workingPath${workingDir}/country/workingS --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --preparedInfoPath${workingDir}/preparedInfo + --preparedInfoPath${workingDir}/country/preparedInfo - + - - - - - + + + + + - + yarn cluster @@ -270,18 +233,18 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=7680 --sourcePath${sourcePath}/publication - --preparedInfoPath${workingDir}/publication + --preparedInfoPath${workingDir}/country/publication --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication + --outputPath${workingDir}/country/country/publication - + yarn cluster @@ -302,15 +265,15 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/dataset - --preparedInfoPath${workingDir}/dataset + --preparedInfoPath${workingDir}/country/dataset --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset + --outputPath${workingDir}/country/country/dataset - + yarn cluster @@ -331,15 +294,15 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/otherresearchproduct - --preparedInfoPath${workingDir}/otherresearchproduct + --preparedInfoPath${workingDir}/country/otherresearchproduct --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct + --outputPath${workingDir}/country/country/otherresearchproduct - + yarn cluster @@ -360,16 +323,57 @@ --conf spark.sql.shuffle.partitions=3840 --sourcePath${sourcePath}/software - --preparedInfoPath${workingDir}/software + --preparedInfoPath${workingDir}/country/software --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - + --outputPath${workingDir}/country/country/software + - + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/country/country/ + --outputPath${sourcePath}/ + + + + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_preparation_parameter.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_preparation_parameter.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/input_propagation_parameter.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/input_propagation_parameter.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/countrypropagation/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml new file mode 100644 index 000000000..ecec3579b --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/entitytoorganizationfromsemrel/oozie_app/workflow.xml @@ -0,0 +1,112 @@ + + + + sourcePath + the source path + + + startFrom + undelete + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + yarn + cluster + PrepareResultProjectOrganizationAssociation + eu.dnetlib.dhp.entitytoorganizationfromsemrel.PrepareInfo + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --graphPath${sourcePath} + --hive_metastore_uris${hive_metastore_uris} + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + + + + + + + + yarn + cluster + entityToOrganizationFromSemRel + eu.dnetlib.dhp.entitytoorganizationfromsemrel.SparkEntityToOrganizationFromSemRel + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + --conf spark.sql.shuffle.partitions=3840 + + --relationPath${workingDir}/entitiesSemanticRelation/preparedInfo/relation + --outputPath${sourcePath}/relation + --leavesPath${workingDir}/entitiesSemanticRelation/preparedInfo/leavesPath + --childParentPath${workingDir}/entitiesSemanticRelation/preparedInfo/childParentPath + --resultOrgPath${workingDir}/entitiesSemanticRelation/preparedInfo/resultOrgPath + --projectOrganizationPath${workingDir}/entitiesSemanticRelation/preparedInfo/projectOrganizationPath + --hive_metastore_uris${hive_metastore_uris} + --workingDir${workingDir}/entitiesSemanticRelation/working + --iterations${iterations} + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json similarity index 88% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json index 4c25fea01..4645be435 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/bulktag/input_eoscTag_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/input_moveresult_parameters.json @@ -5,9 +5,10 @@ "paramDescription": "the path of the sequencial file to read", "paramRequired": true }, + { - "paramName": "wp", - "paramLongName": "workingPath", + "paramName": "out", + "paramLongName": "outputPath", "paramDescription": "the path used to store temporary output files", "paramRequired": true }, diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json similarity index 88% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json index d8aa7eb9a..3cbaa23bb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_orcidtoresult_parameters.json @@ -11,12 +11,6 @@ "paramDescription": "true if the new version of the graph must be saved", "paramRequired": false }, - { - "paramName":"h", - "paramLongName":"hive_metastore_uris", - "paramDescription": "the hive metastore uris", - "paramRequired": true - }, { "paramName": "out", "paramLongName": "outputPath", diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/input_prepareorcidtoresult_parameters2.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml similarity index 92% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml index 5f52c1658..a9642d637 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/orcidtoresultfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/orcidtoresultfromsemrel/oozie_app/workflow.xml @@ -114,7 +114,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -142,7 +142,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -170,7 +170,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -198,7 +198,7 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetOrcidAssoc + --outputPath${workingDir}/orcid/targetOrcidAssoc --allowedsemrels${allowedsemrels} @@ -225,8 +225,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/preparedInfo/targetOrcidAssoc - --outputPath${workingDir}/preparedInfo/mergedOrcidAssoc + --sourcePath${workingDir}/orcid/targetOrcidAssoc + --outputPath${workingDir}/orcid/mergedOrcidAssoc @@ -247,9 +247,10 @@ eu.dnetlib.dhp.orcidtoresultfromsemrel.SparkOrcidToResultFromSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=4 + --executor-memory=4G --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=5G --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} @@ -259,11 +260,10 @@ --conf spark.speculation=false --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --conf spark.sql.shuffle.partitions=3840 + --conf spark.sql.shuffle.partitions=15000 - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/publication - --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication --outputPath${outputPath}/publication @@ -292,9 +292,8 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/dataset - --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset --outputPath${outputPath}/dataset @@ -323,9 +322,8 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/otherresearchproduct - --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct --outputPath${outputPath}/otherresearchproduct @@ -354,9 +352,8 @@ --conf spark.hadoop.mapreduce.map.speculative=false --conf spark.hadoop.mapreduce.reduce.speculative=false - --possibleUpdatesPath${workingDir}/preparedInfo/mergedOrcidAssoc + --possibleUpdatesPath${workingDir}/orcid/mergedOrcidAssoc --sourcePath${sourcePath}/software - --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software --outputPath${outputPath}/software @@ -365,6 +362,15 @@ + + + + + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_prepareprojecttoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_prepareprojecttoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/input_projecttoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/input_projecttoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/projecttoresult/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml new file mode 100644 index 000000000..287ee4ba8 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/projecttoresult/oozie_app/workflow.xml @@ -0,0 +1,104 @@ + + + + sourcePath + the source path + + + allowedsemrels + the allowed semantics + + + startFrom + undelete + + + + + ${jobTracker} + ${nameNode} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + yarn + cluster + PrepareProjectResultsAssociation + eu.dnetlib.dhp.projecttoresult.PrepareProjectResultsAssociation + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --sourcePath${sourcePath}/relation + --allowedsemrels${allowedsemrels} + --hive_metastore_uris${hive_metastore_uris} + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + + yarn + cluster + ProjectToResultPropagation + eu.dnetlib.dhp.projecttoresult.SparkResultToProjectThroughSemRelJob + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --hive_metastore_uris${hive_metastore_uris} + --outputPath${sourcePath}/relation + --potentialUpdatePath${workingDir}/resultproject/preparedInfo/potentialUpdates + --alreadyLinkedPath${workingDir}/resultproject/preparedInfo/alreadyLinked + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_communitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_communitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/entitytoorganizationfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml similarity index 61% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml index e342bce23..aa5357eea 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromorganization/oozie_app/workflow.xml @@ -5,12 +5,12 @@ the source path - outputPath - the output path + baseURL + the baseURL from where to reach the community APIs - baseURL - the community API base URL + startFrom + undelete @@ -25,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -33,58 +41,13 @@ - - + + - + - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - yarn @@ -102,10 +65,11 @@ --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList + --outputPath${workingDir}/communityorganization/preparedInfo/resultCommunityList --hive_metastore_uris${hive_metastore_uris} --baseURL${baseURL} @@ -132,16 +96,41 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --preparedInfoPath${workingDir}/communityorganization/preparedInfo/resultCommunityList --sourcePath${sourcePath}/ - --outputPath${outputPath}/ + --outputPath${workingDir}/communityorganization/resulttocommunityfromorganization/ + + + + + + + + yarn + cluster + community2resultfromorganization - move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communityorganization/resulttocommunityfromorganization/ + --outputPath${sourcePath}/ + - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_communitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_communitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/input_preparecommunitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromorganization/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml similarity index 61% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml index d0784c94d..0ceee5a7e 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromproject/oozie_app/workflow.xml @@ -5,12 +5,12 @@ the source path - outputPath - the output path + baseURL + the base URL to use to select the right community APIs - baseURL - the community API base URL + startFrom + undelete @@ -25,66 +25,28 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] - - - + + - + - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - yarn @@ -106,7 +68,7 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/relation - --outputPath${workingDir}/preparedInfo/resultCommunityList + --outputPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList --baseURL${baseURL} @@ -132,16 +94,41 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/resultCommunityList + --preparedInfoPath${workingDir}/communitythroughproject/preparedInfo/resultCommunityList --sourcePath${sourcePath}/ - --outputPath${outputPath}/ + --outputPath${workingDir}/communitythroughproject/ + + + + + + + + yarn + cluster + move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitythroughproject/ + --outputPath${sourcePath}/ + - - \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_communitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_communitytoresult_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult2_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json similarity index 80% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json index 8c99da673..c6389ec8d 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/input_preparecommunitytoresult_parameters.json @@ -1,7 +1,7 @@ [ { - "paramName":"is", - "paramLongName":"isLookUpUrl", + "paramName":"bu", + "paramLongName":"baseURL", "paramDescription": "URL of the isLookUp Service", "paramRequired": true }, @@ -36,9 +36,9 @@ "paramRequired": true }, { - "paramName":"tn", - "paramLongName":"resultTableName", - "paramDescription": "the name of the result table we are currently working on", - "paramRequired": true - } + "paramName":"tn", + "paramLongName":"resultTableName", + "paramDescription": "the name of the result table we are currently working on", + "paramRequired": true + } ] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromproject/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml similarity index 71% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml index 81b51443c..b5e6fbf05 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttocommunityfromsemrel/oozie_app/workflow.xml @@ -9,16 +9,28 @@ the semantic relationships allowed for propagation - isLookUpUrl + baseURL the isLookup service endpoint outputPath the output path + + startFrom + undelete + - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -26,66 +38,13 @@ - - + + - + - - - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${jobTracker} - ${nameNode} - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - - @@ -101,8 +60,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -114,9 +75,9 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} @@ -130,8 +91,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -143,9 +106,9 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} @@ -159,8 +122,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -172,9 +137,9 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} @@ -188,8 +153,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep1 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -201,9 +168,9 @@ --sourcePath${sourcePath} --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${workingDir}/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc --allowedsemrels${allowedsemrels} - --isLookUpUrl${isLookUpUrl} + --baseURL${baseURL} @@ -219,8 +186,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.PrepareResultCommunitySetStep2 dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -229,8 +198,8 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --sourcePath${workingDir}/preparedInfo/targetCommunityAssoc - --outputPath${workingDir}/preparedInfo/mergedCommunityAssoc + --sourcePath${workingDir}/communitysemrel/preparedInfo/targetCommunityAssoc + --outputPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc @@ -251,8 +220,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=4G + --conf spark.executor.memoryOverhead=5G + --conf spark.sql.shuffle.partitions=15000 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -261,12 +232,12 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/publication --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication - --outputPath${outputPath}/publication - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/publication + @@ -280,8 +251,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -290,12 +263,12 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/dataset --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset - --outputPath${outputPath}/dataset - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/dataset + @@ -309,8 +282,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -319,12 +294,12 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/otherresearchproduct --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct - --outputPath${outputPath}/otherresearchproduct - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/otherresearchproduct + @@ -338,8 +313,10 @@ eu.dnetlib.dhp.resulttocommunityfromsemrel.SparkResultToCommunityThroughSemRelJob dhp-enrichment-${projectVersion}.jar - --executor-cores=${sparkExecutorCores} - --executor-memory=${sparkExecutorMemory} + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 --driver-memory=${sparkDriverMemory} --conf spark.extraListeners=${spark2ExtraListeners} --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} @@ -348,18 +325,45 @@ --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} - --preparedInfoPath${workingDir}/preparedInfo/mergedCommunityAssoc + --preparedInfoPath${workingDir}/communitysemrel/preparedInfo/mergedCommunityAssoc --sourcePath${sourcePath}/software --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software - --outputPath${outputPath}/software - --saveGraph${saveGraph} + --outputPath${workingDir}/communitysemrel/software + - + + + + + yarn + cluster + move results + eu.dnetlib.dhp.MoveResult + dhp-enrichment-${projectVersion}.jar + + --executor-cores=6 + --executor-memory=5G + --conf spark.executor.memoryOverhead=3g + --conf spark.sql.shuffle.partitions=3284 + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --sourcePath${workingDir}/communitysemrel/ + --outputPath${sourcePath}/ + + + + + diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json new file mode 100644 index 000000000..5fe92cff1 --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_newrelation_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName":"s", + "paramLongName":"sourcePath", + "paramDescription": "the path of the sequencial file to read", + "paramRequired": true + }, + + { + "paramName": "ssm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the path where prepared info have been stored", + "paramRequired": false + },{ + "paramName": "o", + "paramLongName": "outputPath", + "paramDescription": "institutional repositories that should not be considered for the propagation", + "paramRequired": false +} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json similarity index 66% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json index 2f00bacae..3f4b1d151 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_prepareresultorg_parameters.json @@ -11,16 +11,11 @@ "paramDescription": "the hive metastore uris", "paramRequired": true }, + { - "paramName":"dop", - "paramLongName":"datasourceOrganizationPath", - "paramDescription": "path where to store/find association from datasource and organization", - "paramRequired": true - }, - { - "paramName":"alp", - "paramLongName":"alreadyLinkedPath", - "paramDescription": "path where to store/find already linked results and organizations", + "paramName":"wp", + "paramLongName":"workingPath", + "paramDescription": "the working path", "paramRequired": true }, { diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/input_propagationresulaffiliationfrominstrepo_parameters.json diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml similarity index 100% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttocommunityfromsemrel/oozie_app/config-default.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/config-default.xml diff --git a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml similarity index 67% rename from dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml rename to dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml index edfff8817..ca76a0e85 100644 --- a/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/resulttoorganizationfrominstrepo/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-enrichment/src/main/resources/eu/dnetlib/dhp/wf/subworkflows/resulttoorganizationfrominstrepo/oozie_app/workflow.xml @@ -5,8 +5,12 @@ the source path - outputPath - sets the outputPath + blacklist + The list of institutional repositories that should not be used for the propagation + + + startFrom + undelete @@ -21,7 +25,15 @@ - + + + + + ${wf:conf('startFrom') eq 'undelete'} + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] @@ -29,97 +41,13 @@ - - + + - + - - - - - - - - - - - - - - ${nameNode}/${sourcePath}/relation - ${nameNode}/${outputPath}/relation - - - - - - - - ${nameNode}/${sourcePath}/publication - ${nameNode}/${outputPath}/publication - - - - - - - - ${nameNode}/${sourcePath}/dataset - ${nameNode}/${outputPath}/dataset - - - - - - - - ${nameNode}/${sourcePath}/otherresearchproduct - ${nameNode}/${outputPath}/otherresearchproduct - - - - - - - - ${nameNode}/${sourcePath}/software - ${nameNode}/${outputPath}/software - - - - - - - - ${nameNode}/${sourcePath}/organization - ${nameNode}/${outputPath}/organization - - - - - - - - ${nameNode}/${sourcePath}/project - ${nameNode}/${outputPath}/project - - - - - - - - ${nameNode}/${sourcePath}/datasource - ${nameNode}/${outputPath}/datasource - - - - - - @@ -138,10 +66,9 @@ --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} --sourcePath${sourcePath} - --hive_metastore_uris${hive_metastore_uris} - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --workingPath${workingDir}/affiliationInstRepo --blacklist${blacklist} + --hive_metastore_uris${hive_metastore_uris} @@ -173,9 +100,9 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/publication - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --outputPath${workingDir}/affiliationInstRepo/publication/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Publication @@ -202,9 +129,9 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/dataset - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --outputPath${workingDir}/affiliationInstRepo/dataset/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Dataset @@ -231,9 +158,9 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/otherresearchproduct - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --outputPath${workingDir}/affiliationInstRepo/otherresearchproduct/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.OtherResearchProduct @@ -260,9 +187,9 @@ --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} --sourcePath${sourcePath}/software - --outputPath${outputPath}/relation - --datasourceOrganizationPath${workingDir}/preparedInfo/datasourceOrganization - --alreadyLinkedPath${workingDir}/preparedInfo/alreadyLinked + --outputPath${workingDir}/affiliationInstRepo/software/relation + --datasourceOrganizationPath${workingDir}/affiliationInstRepo/preparedInfo/datasourceOrganization + --alreadyLinkedPath${workingDir}/affiliationInstRepo/preparedInfo/alreadyLinked --hive_metastore_uris${hive_metastore_uris} --resultTableNameeu.dnetlib.dhp.schema.oaf.Software @@ -270,7 +197,32 @@ - + + + + + yarn + cluster + append new relations + eu.dnetlib.dhp.resulttoorganizationfrominstrepo.AppendNewRelations + dhp-enrichment-${projectVersion}.jar + + --executor-cores=${sparkExecutorCores} + --executor-memory=${sparkExecutorMemory} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.dynamicAllocation.enabled=true + --conf spark.dynamicAllocation.maxExecutors=${spark2MaxExecutors} + + --outputPath${sourcePath}/relation + --sourcePath${workingDir}/affiliationInstRepo/ + + + + diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java index 517a20cd9..db917658a 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/entitytoorganizationfromsemrel/SparkJobTest.java @@ -114,7 +114,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -395,7 +395,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { @@ -678,7 +678,7 @@ public class SparkJobTest { .option("compression", "gzip") .json(workingDir.toString() + "/projectInput"); - SparkResultToOrganizationFromSemRel + SparkEntityToOrganizationFromSemRel .main( new String[] { diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java index a63296d18..eee518353 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OafToOafMapper.java @@ -11,6 +11,7 @@ import java.util.List; import java.util.Set; import java.util.stream.Collectors; +import org.apache.commons.lang3.ObjectUtils; import org.apache.commons.lang3.StringUtils; import org.dom4j.Document; import org.dom4j.Element; @@ -27,6 +28,15 @@ import eu.dnetlib.dhp.schema.oaf.utils.ModelHardLimits; public class OafToOafMapper extends AbstractMdRecordToOafMapper { + private static Set DC_TYPE_PUBLICATION_VERSION = new HashSet<>(); + + static { + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/submittedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/acceptedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/publishedVersion"); + DC_TYPE_PUBLICATION_VERSION.add("info:eu-repo/semantics/updatedVersion"); + } + public OafToOafMapper(final VocabularyGroup vocs, final boolean invisible, final boolean shouldHashId, final boolean forceOrginalId) { super(vocs, invisible, shouldHashId, forceOrginalId); @@ -192,24 +202,40 @@ public class OafToOafMapper extends AbstractMdRecordToOafMapper { /** * The Dublin Core element dc:type can be repeated, but we need to base our mapping on a single value * So this method tries to give precedence to the COAR resource type, when available. Otherwise, it looks for the - * openaire's info:eu-repo type, and as last resort picks the 1st type text available + * openaire's info:eu-repo type, but excluding the following + * + * info:eu-repo/semantics/draft + * info:eu-repo/semantics/submittedVersion + * info:eu-repo/semantics/acceptedVersion + * info:eu-repo/semantics/publishedVersion + * info:eu-repo/semantics/updatedVersion + * + * Then, it picks the 1st dc:type text available and, in case there is no dc:type element, as last resort it tries + * to extract the type from the dr:CobjCategory element + * + * Examples: * * http://purl.org/coar/resource_type/c_5794 * info:eu-repo/semantics/article * Conference article + * 0006 * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return (String) doc + final String dcType = (String) doc .selectNodes("//dc:type") .stream() .map(o -> "" + ((Node) o).getText().trim()) + .filter(t -> !DC_TYPE_PUBLICATION_VERSION.contains(t)) .sorted(new OriginalTypeComparator()) .findFirst() .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(dcType, drCobjCategory); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java index e63b01a00..57e0d2955 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/OdfToOafMapper.java @@ -221,27 +221,48 @@ public class OdfToOafMapper extends AbstractMdRecordToOafMapper { } /** - * The Datacite element + * Extracts the resource type from The Datacite element * - * journal article + * journal article * * @param doc the input document * @return the chosen resource type */ @Override protected String findOriginalType(Document doc) { - return Optional + final String resourceType = Optional .ofNullable( (Element) doc .selectSingleNode( "//*[local-name()='metadata']/*[local-name() = 'resource']/*[local-name() = 'resourceType']")) - .map(element -> { - final String resourceTypeURI = element.attributeValue("anyURI"); - final String resourceTypeTxt = element.getText(); + .map(e -> { + final String resourceTypeURI = Optional + .ofNullable(e.attributeValue("uri")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeAnyURI = Optional + .ofNullable(e.attributeValue("anyURI")) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeTxt = Optional + .ofNullable(e.getText()) + .filter(StringUtils::isNotBlank) + .orElse(null); + final String resourceTypeGeneral = Optional + .ofNullable(e.attributeValue("resourceTypeGeneral")) + .filter(StringUtils::isNotBlank) + .orElse(null); - return ObjectUtils.firstNonNull(resourceTypeURI, resourceTypeTxt); + return ObjectUtils + .firstNonNull(resourceTypeURI, resourceTypeAnyURI, resourceTypeTxt, resourceTypeGeneral); }) .orElse(null); + + final String drCobjCategory = doc.valueOf("//dr:CobjCategory/text()"); + return ObjectUtils.firstNonNull(resourceType, drCobjCategory); } @Override diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml index ce117b5e9..3493ecb2f 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/enrich/orcid/oozie_app/workflow.xml @@ -43,6 +43,17 @@ --graphPath${graphPath} --masteryarn + + + + + + + + + + + diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql index 149c326fa..748f77b27 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hive/oozie_app/lib/scripts/postprocessing.sql @@ -1,10 +1,10 @@ DROP VIEW IF EXISTS ${hiveDbName}.result; CREATE VIEW IF NOT EXISTS ${hiveDbName}.result as - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.publication p union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.dataset d union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.software s union all - select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; + select id, originalid, dateofcollection, title, publisher, bestaccessright, datainfo, collectedfrom, pid, author, resulttype, metaresourcetype, language, country, subject, description, dateofacceptance, relevantdate, embargoenddate, resourcetype, context, externalreference, instance, measures, processingchargeamount, eoscifguidelines from ${hiveDbName}.otherresearchproduct o; diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh index 35220bd8c..9877fe7de 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/hostedbymap/oozie_app/download.sh @@ -1,3 +1,3 @@ #!/bin/bash curl -LSs $1 | hdfs dfs -put - $2/$3 -curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file +#curl -LSs http://api.crossref.org/works/10.1099/jgv.0.001453 > prova.txt \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java index ac0435ce2..a6bbd30cf 100644 --- a/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java +++ b/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/oa/graph/raw/MappersTest.java @@ -1171,6 +1171,34 @@ class MappersTest { } + @Test + void test_Zenodo2() throws IOException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("odf_zenodo2.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + + assertEquals(3, list.size()); + Publication p = cleanup((Publication) list.get(0), vocs); + + assertNotNull(p.getInstance()); + assertEquals(1, p.getInstance().size()); + + final Instance instance = p.getInstance().get(0); + + assertNotNull(instance.getInstanceTypeMapping()); + assertEquals(1, instance.getInstanceTypeMapping().size()); + + Optional coarType = instance + .getInstanceTypeMapping() + .stream() + .filter(itm -> ModelConstants.OPENAIRE_COAR_RESOURCE_TYPES_3_1.equals(itm.getVocabularyName())) + .findFirst(); + + assertTrue(coarType.isPresent()); + assertNotNull(coarType.get().getOriginalType()); + assertNull(coarType.get().getTypeCode()); + assertNull(coarType.get().getTypeLabel()); + } + @Test void testROHub2() throws IOException { final String xml = IOUtils @@ -1229,7 +1257,7 @@ class MappersTest { } @Test - public void testD4ScienceTraining() throws IOException { + void testD4ScienceTraining() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-1-training.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1240,7 +1268,7 @@ class MappersTest { } @Test - public void testD4ScienceDataset() throws IOException { + void testD4ScienceDataset() throws IOException { final String xml = IOUtils .toString(Objects.requireNonNull(getClass().getResourceAsStream("d4science-2-dataset.xml"))); final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); @@ -1250,6 +1278,21 @@ class MappersTest { System.out.println("***************"); } + @Test + void testIRISPub() throws IOException, DocumentException { + final String xml = IOUtils.toString(Objects.requireNonNull(getClass().getResourceAsStream("iris-odf.xml"))); + final List list = new OdfToOafMapper(vocs, false, true).processMdRecord(xml); + System.out.println("***************"); + System.out.println(new ObjectMapper().writeValueAsString(list)); + System.out.println("***************"); + final Publication p = (Publication) list.get(0); + assertNotNull(p.getInstance().get(0).getUrl().get(0)); + assertValidId(p.getId()); + System.out.println(p.getInstance().get(0).getUrl()); + p.getPid().forEach(x -> System.out.println(x.getValue())); + p.getInstance().get(0).getAlternateIdentifier().forEach(x -> System.out.println(x.getValue())); + + } @Test void testNotWellFormed() throws IOException { final String xml = IOUtils diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml new file mode 100644 index 000000000..1b875ec36 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/iris-odf.xml @@ -0,0 +1,215 @@ + + + + oai:air.unimi.it:2434/907506 + 2024-01-04T12:42:51Z + com_2434_73555 + col_2434_73557 + openaire + 2024-01-29T16:56:50.632Z + + od______1261::ff2d9e058e7bea90a27f41c31078e601 + oai:air.unimi.it:2434/907506 + + + + + od______1261 + + + + + Ensuring tests of conservation interventions build on existing literature + + + + W.J. Sutherland + + + S.T. Alvarez-Castaneda + + + T. Amano + + + R. Ambrosini + + + P. Atkinson + + + J.M. Baxter + + + A.L. Bond + + + P.J. Boon + + + K.L. Buchanan + + + J. Barlow + + + G. Bogliani + + + O.M. Bragg + + + M. Burgman + + + M.W. Cadotte + + + M. Calver + + + S.J. Cooke + + + R.T. Corlett + + + V. Devictor + + + J.G. Ewen + + + M. Fisher + + + G. Freeman + + + E. Game + + + B.J. Godley + + + C. Gortazar + + + I.R. Hartley + + + D.L. Hawksworth + + + K.A. Hobson + + + M.-. Lu + + + B. Martin-Lopez + + + K. Ma + + + A. Machado + + + D. Mae + + + M. Mangiacotti + + + D.J. Mccafferty + + + V. Melfi + + + S. Molur + + + A.J. Moore + + + S.D. Murphy + + + D. Norri + + + A.P.E. van Oudenhoven + + + J. Power + + + E.C. Ree + + + M.W. Schwartz + + + I. Storch + + + C. Wordley + + + + + + 2020 + 2020 + 2022-06-20 + + eng + Wiley Blackwell Publishing + journal article + application/pdf + 2434/907506 + open access + + Conservation of Natural Resources + + + + + 191802 bytes + + https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf + + 10.1111/cobi.13555 + 32779884 + https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf + 0001 + 2020-01-01 + OPEN + eng + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml new file mode 100644 index 000000000..ebe105de8 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/resources/eu/dnetlib/dhp/oa/graph/raw/odf_zenodo2.xml @@ -0,0 +1,59 @@ + + +
+ oai:zenodo.org:1596086 + 2020-01-20T13:50:28Z + openaire + 2024-02-08T11:03:10.994Z + od______2659::036d5555a6688ed00c8d0da97bdece3b + 2024-02-08T11:03:10.994Z + 2024-02-08T11:03:10.994Z +
+ + + https://zenodo.org/record/1596086 + + + + Bonney, T. G. + T. G. + Bonney + + + + Ice Blocks on a Moraine + + Zenodo + 1889 + + 1889-08-22 + + + + 10.1038/040391a0 + + + Creative Commons Zero v1.0 Universal + Open Access + + + n/a + + + 0001 + 1889-08-22 + OPEN + http://creativecommons.org/publicdomain/zero/1.0/legalcode + + + + +
diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java index 518f41120..6f43ca3f7 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/XmlConverterJob.java @@ -62,8 +62,8 @@ public class XmlConverterJob { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); - final String isLookupUrl = parser.get("isLookupUrl"); - log.info("isLookupUrl: {}", isLookupUrl); + final String contextApiBaseUrl = parser.get("contextApiBaseUrl"); + log.info("contextApiBaseUrl: {}", contextApiBaseUrl); final SparkConf conf = new SparkConf(); conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); @@ -71,7 +71,7 @@ public class XmlConverterJob { runWithSparkSession(conf, isSparkSessionManaged, spark -> { removeOutputDir(spark, outputPath); - convertToXml(spark, inputPath, outputPath, ContextMapper.fromIS(isLookupUrl)); + convertToXml(spark, inputPath, outputPath, ContextMapper.fromAPI(contextApiBaseUrl)); }); } diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java index bcaf40603..083dbe988 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/ContextMapper.java @@ -1,18 +1,22 @@ package eu.dnetlib.dhp.oa.provision.utils; -import java.io.Serializable; -import java.io.StringReader; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; import java.util.HashMap; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Node; import org.dom4j.io.SAXReader; +import org.jetbrains.annotations.NotNull; import org.xml.sax.SAXException; import com.google.common.base.Joiner; +import eu.dnetlib.dhp.common.api.context.*; +import eu.dnetlib.dhp.common.rest.DNetRestClient; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -23,6 +27,45 @@ public class ContextMapper extends HashMap implements Serial private static final String XQUERY = "for $x in //RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ContextDSResourceType']//*[name()='context' or name()='category' or name()='concept'] return "; + public static ContextMapper fromAPI(final String baseURL) throws Exception { + + final ContextMapper contextMapper = new ContextMapper(); + + for (ContextSummary ctx : DNetRestClient + .doGET(String.format("%s/contexts", baseURL), ContextSummaryList.class)) { + + contextMapper.put(ctx.getId(), new ContextDef(ctx.getId(), ctx.getLabel(), "context", ctx.getType())); + + for (CategorySummary cat : DNetRestClient + .doGET(String.format("%s/context/%s?all=true", baseURL, ctx.getId()), CategorySummaryList.class)) { + contextMapper.put(cat.getId(), new ContextDef(cat.getId(), cat.getLabel(), "category", "")); + if (cat.isHasConcept()) { + for (ConceptSummary c : DNetRestClient + .doGET( + String.format("%s/context/category/%s?all=true", baseURL, cat.getId()), + ConceptSummaryList.class)) { + contextMapper.put(c.getId(), new ContextDef(c.getId(), c.getLabel(), "concept", "")); + if (c.isHasSubConcept()) { + for (ConceptSummary cs : c.getConcepts()) { + contextMapper.put(cs.getId(), new ContextDef(cs.getId(), cs.getLabel(), "concept", "")); + if (cs.isHasSubConcept()) { + for (ConceptSummary css : cs.getConcepts()) { + contextMapper + .put( + css.getId(), + new ContextDef(css.getId(), css.getLabel(), "concept", "")); + } + } + } + } + } + } + } + } + return contextMapper; + } + + @Deprecated public static ContextMapper fromIS(final String isLookupUrl) throws DocumentException, ISLookUpException, SAXException { ISLookUpService isLookUp = ISLookupClientFactory.getLookUpService(isLookupUrl); @@ -32,6 +75,7 @@ public class ContextMapper extends HashMap implements Serial return fromXml(sb.toString()); } + @Deprecated public static ContextMapper fromXml(final String xml) throws DocumentException, SAXException { final ContextMapper contextMapper = new ContextMapper(); diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json index eda6154d7..653a69ed1 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/input_params_xml_converter.json @@ -12,9 +12,9 @@ "paramRequired": true }, { - "paramName": "ilu", - "paramLongName": "isLookupUrl", - "paramDescription": "URL of the isLookUp Service", + "paramName": "cau", + "paramLongName": "contextApiBaseUrl", + "paramDescription": "URL of the context API", "paramRequired": true } ] diff --git a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml index 2e7b11dde..9eab960f0 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-provision/src/main/resources/eu/dnetlib/dhp/oa/provision/oozie_app/workflow.xml @@ -9,6 +9,10 @@ isLookupUrl URL for the isLookup service + + contextApiBaseUrl + context API URL + relPartitions number or partitions for the relations Dataset @@ -589,7 +593,7 @@ --inputPath${workingDir}/join_entities --outputPath${workingDir}/xml - --isLookupUrl${isLookupUrl} + --contextApiBaseUrl${contextApiBaseUrl} diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java index e07ba1b4e..e72883055 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/IndexRecordTransformerTest.java @@ -82,7 +82,7 @@ public class IndexRecordTransformerTest { void testPeerReviewed() throws IOException, TransformerException { final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, - XmlConverterJob.schemaLocation); + XmlConverterJob.schemaLocation); final Publication p = load("publication.json", Publication.class); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java index b62acbac3..a3a140cf6 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlIndexingJobTest.java @@ -1,6 +1,8 @@ package eu.dnetlib.dhp.oa.provision; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.io.IOException; import java.io.StringReader; import java.net.URI; @@ -32,8 +34,6 @@ import eu.dnetlib.dhp.oa.provision.utils.ISLookupClient; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; -import static org.junit.jupiter.api.Assertions.assertEquals; - @ExtendWith(MockitoExtension.class) public class XmlIndexingJobTest extends SolrTest { @@ -110,34 +110,33 @@ public class XmlIndexingJobTest extends SolrTest { QueryResponse rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "*:*")); assertEquals( - nRecord, rsp.getResults().getNumFound(), - "the number of indexed records should be equal to the number of input records"); - + nRecord, rsp.getResults().getNumFound(), + "the number of indexed records should be equal to the number of input records"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isgreen:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isgreen = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isgreen = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "openaccesscolor:bronze")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having openaccesscolor = bronze"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having openaccesscolor = bronze"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "isindiamondjournal:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having isindiamondjournal = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having isindiamondjournal = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "publiclyfunded:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having publiclyfunded = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having publiclyfunded = true"); rsp = miniCluster.getSolrClient().query(new SolrQuery().add(CommonParams.Q, "peerreviewed:true")); assertEquals( - 0, rsp.getResults().getNumFound(), - "the number of indexed records having peerreviewed = true"); + 0, rsp.getResults().getNumFound(), + "the number of indexed records having peerreviewed = true"); } @Test diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index de69795f8..ef6370bf3 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -244,4 +244,27 @@ public class XmlRecordFactoryTest { } + @Test + public void testIrisGuidelines4() throws DocumentException, IOException { + final ContextMapper contextMapper = new ContextMapper(); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + XmlConverterJob.schemaLocation); + + final Publication p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("iris-odf-4.json")), + Publication.class); + + final String xml = xmlRecordFactory.build(new JoinedEntity<>(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + System.out.println(doc.asXML()); + + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json new file mode 100644 index 000000000..4c5ec9708 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/iris-odf-4.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1706638577436,"id":"50|od______1261::ff2d9e058e7bea90a27f41c31078e601","originalId":["oai:air.unimi.it:2434/907506","50|od______1261::ff2d9e058e7bea90a27f41c31078e601"],"pid":[{"value":"2434/907506","qualifier":{"classid":"handle","classname":"Handle","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"","dateoftransformation":"2024-01-29T16:56:50.632Z","extraInfo":[],"oaiprovenance":null,"measures":null,"processingchargeamount":null,"processingchargecurrency":null,"author":[{"fullname":"W.J. Sutherland","name":"W. J.","surname":"Sutherland","rank":1,"pid":[],"affiliation":[]},{"fullname":"S.T. Alvarez-Castaneda","name":"S. T.","surname":"Alvarez-Castaneda","rank":2,"pid":[],"affiliation":[]},{"fullname":"T. Amano","name":"T.","surname":"Amano","rank":3,"pid":[],"affiliation":[]},{"fullname":"R. Ambrosini","name":"R.","surname":"Ambrosini","rank":4,"pid":[],"affiliation":[]},{"fullname":"P. Atkinson","name":"P.","surname":"Atkinson","rank":5,"pid":[],"affiliation":[]},{"fullname":"J.M. Baxter","name":"J. M.","surname":"Baxter","rank":6,"pid":[],"affiliation":[]},{"fullname":"A.L. Bond","name":"A. L.","surname":"Bond","rank":7,"pid":[],"affiliation":[]},{"fullname":"P.J. Boon","name":"P. J.","surname":"Boon","rank":8,"pid":[],"affiliation":[]},{"fullname":"K.L. Buchanan","name":"K. L.","surname":"Buchanan","rank":9,"pid":[],"affiliation":[]},{"fullname":"J. Barlow","name":"J.","surname":"Barlow","rank":10,"pid":[],"affiliation":[]},{"fullname":"G. Bogliani","name":"G.","surname":"Bogliani","rank":11,"pid":[],"affiliation":[]},{"fullname":"O.M. Bragg","name":"O. M.","surname":"Bragg","rank":12,"pid":[],"affiliation":[]},{"fullname":"M. Burgman","name":"M.","surname":"Burgman","rank":13,"pid":[],"affiliation":[]},{"fullname":"M.W. Cadotte","name":"M. W.","surname":"Cadotte","rank":14,"pid":[],"affiliation":[]},{"fullname":"M. Calver","name":"M.","surname":"Calver","rank":15,"pid":[],"affiliation":[]},{"fullname":"S.J. Cooke","name":"S. J.","surname":"Cooke","rank":16,"pid":[],"affiliation":[]},{"fullname":"R.T. Corlett","name":"R. T.","surname":"Corlett","rank":17,"pid":[],"affiliation":[]},{"fullname":"V. Devictor","name":"V.","surname":"Devictor","rank":18,"pid":[],"affiliation":[]},{"fullname":"J.G. Ewen","name":"J. G.","surname":"Ewen","rank":19,"pid":[],"affiliation":[]},{"fullname":"M. Fisher","name":"M.","surname":"Fisher","rank":20,"pid":[],"affiliation":[]},{"fullname":"G. Freeman","name":"G.","surname":"Freeman","rank":21,"pid":[],"affiliation":[]},{"fullname":"E. Game","name":"E.","surname":"Game","rank":22,"pid":[],"affiliation":[]},{"fullname":"B.J. Godley","name":"B. J.","surname":"Godley","rank":23,"pid":[],"affiliation":[]},{"fullname":"C. Gortazar","name":"C.","surname":"Gortazar","rank":24,"pid":[],"affiliation":[]},{"fullname":"I.R. Hartley","name":"I. R.","surname":"Hartley","rank":25,"pid":[],"affiliation":[]},{"fullname":"D.L. Hawksworth","name":"D. L.","surname":"Hawksworth","rank":26,"pid":[],"affiliation":[]},{"fullname":"K.A. Hobson","name":"K. A.","surname":"Hobson","rank":27,"pid":[],"affiliation":[]},{"fullname":"M.-. Lu","name":"M. -.","surname":"Lu","rank":28,"pid":[],"affiliation":[]},{"fullname":"B. Martin-Lopez","name":"B.","surname":"Martin-Lopez","rank":29,"pid":[],"affiliation":[]},{"fullname":"K. Ma","name":"K.","surname":"Ma","rank":30,"pid":[],"affiliation":[]},{"fullname":"A. Machado","name":"A.","surname":"Machado","rank":31,"pid":[],"affiliation":[]},{"fullname":"D. Mae","name":"D.","surname":"Mae","rank":32,"pid":[],"affiliation":[]},{"fullname":"M. Mangiacotti","name":"M.","surname":"Mangiacotti","rank":33,"pid":[],"affiliation":[]},{"fullname":"D.J. Mccafferty","name":"D. J.","surname":"Mccafferty","rank":34,"pid":[],"affiliation":[]},{"fullname":"V. Melfi","name":"V.","surname":"Melfi","rank":35,"pid":[],"affiliation":[]},{"fullname":"S. Molur","name":"S.","surname":"Molur","rank":36,"pid":[],"affiliation":[]},{"fullname":"A.J. Moore","name":"A. J.","surname":"Moore","rank":37,"pid":[],"affiliation":[]},{"fullname":"S.D. Murphy","name":"S. D.","surname":"Murphy","rank":38,"pid":[],"affiliation":[]},{"fullname":"D. Norri","name":"D.","surname":"Norri","rank":39,"pid":[],"affiliation":[]},{"fullname":"A.P.E. van Oudenhoven","name":"A. P. E.","surname":"Oudenhoven","rank":40,"pid":[],"affiliation":[]},{"fullname":"J. Power","name":"J.","surname":"Power","rank":41,"pid":[],"affiliation":[]},{"fullname":"E.C. Ree","name":"E. C.","surname":"Ree","rank":42,"pid":[],"affiliation":[]},{"fullname":"M.W. Schwartz","name":"M. W.","surname":"Schwartz","rank":43,"pid":[],"affiliation":[]},{"fullname":"I. Storch","name":"I.","surname":"Storch","rank":44,"pid":[],"affiliation":[]},{"fullname":"C. Wordley","name":"C.","surname":"Wordley","rank":45,"pid":[],"affiliation":[]}],"resulttype":{"classid":"publication","classname":"publication","schemeid":"dnet:result_typologies","schemename":"dnet:result_typologies"},"metaResourceType":null,"language":{"classid":"eng","classname":"English","schemeid":"dnet:languages","schemename":"dnet:languages"},"country":[],"subject":[{"value":"Conservation of Natural Resources","qualifier":{"classid":"","classname":"","schemeid":"","schemename":""},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"title":[{"value":"Ensuring tests of conservation interventions build on existing literature","qualifier":{"classid":"main title","classname":"main title","schemeid":"dnet:dataCite_title","schemename":"dnet:dataCite_title"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"relevantdate":[{"value":"2020","qualifier":{"classid":"Accepted","classname":"Accepted","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2020","qualifier":{"classid":"Issued","classname":"Issued","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"2022-06-20","qualifier":{"classid":"Available","classname":"Available","schemeid":"dnet:dataCite_date","schemename":"dnet:dataCite_date"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"description":[],"dateofacceptance":{"value":"2020-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"publisher":{"value":"Wiley Blackwell Publishing","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"embargoenddate":null,"source":[],"fulltext":[{"value":"https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"format":[{"value":"application/pdf","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"contributor":[],"resourcetype":{"classid":"journal article","classname":"journal article","schemeid":"dnet:dataCite_resource","schemename":"dnet:dataCite_resource"},"coverage":[],"bestaccessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes"},"context":[],"externalReference":[],"instance":[{"license":null,"accessright":{"classid":"OPEN","classname":"Open Access","schemeid":"dnet:access_modes","schemename":"dnet:access_modes","openAccessRoute":null},"instancetype":{"classid":"0001","classname":"Article","schemeid":"dnet:publication_resource","schemename":"dnet:publication_resource"},"instanceTypeMapping":[{"originalType":"http://purl.org/coar/resource_type/c_6501","typeCode":null,"typeLabel":null,"vocabularyName":"openaire::coar_resource_types_3_1"}],"hostedby":{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null},"url":["https://hdl.handle.net/2434/907506","https://doi.org/10.1111/cobi.13555"],"distributionlocation":null,"collectedfrom":{"key":"10|opendoar____::17326d10d511828f6b34fa6d751739e2","value":"Archivio Istituzionale della Ricerca dell'Università degli Studi di Milano","dataInfo":null},"pid":[{"value":"2434/907506","qualifier":{"classid":"handle","classname":"Handle","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"alternateIdentifier":[{"value":"32779884","qualifier":{"classid":"pmid","classname":"pmid","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"10.1111/cobi.13555","qualifier":{"classid":"doi","classname":"doi","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofacceptance":{"value":"2020-01-01","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.9","inferenceprovenance":null,"provenanceaction":{"classid":"sysimport:crosswalk:repository","classname":"sysimport:crosswalk:repository","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"processingchargeamount":null,"processingchargecurrency":null,"refereed":{"classid":"","classname":"","schemeid":"dnet:review_levels","schemename":"dnet:review_levels"},"measures":null,"fulltext":"https://air.unimi.it/bitstream/2434/907506/4/Full%20manuscript%20resubmitted.pdf"}],"eoscifguidelines":[],"openAccessColor":null,"publiclyFunded":null,"journal":null,"isGreen":null,"isInDiamondJournal":null} diff --git a/dhp-workflows/dhp-stats-update/pom.xml b/dhp-workflows/dhp-stats-update/pom.xml index f491b5868..cc15b8a15 100644 --- a/dhp-workflows/dhp-stats-update/pom.xml +++ b/dhp-workflows/dhp-stats-update/pom.xml @@ -8,6 +8,11 @@ 4.0.0 dhp-stats-update + + eu.dnetlib.dhp + dhp-common + ${project.version} + org.apache.spark spark-core_${scala.binary.version} diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql index 5aa14e2c2..f13b2500c 100755 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql @@ -1,5 +1,5 @@ -- Sprint 1 ---- -drop table if exists ${stats_db_name}.indi_pub_green_oa purge; +drop table if exists ${stats_db_name}.indi_pub_green_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_green_oa stored as parquet as --select distinct p.id, coalesce(green_oa, 0) as green_oa @@ -24,9 +24,9 @@ from ${stats_db_name}.publication p where datasource.type like '%Repository%' and (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and datasource.name!='Other') tmp - on p.id= tmp.id; + on p.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; +drop table if exists ${stats_db_name}.indi_pub_grey_lit purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_grey_lit stored as parquet as select distinct p.id, coalesce(grey_lit, 0) as grey_lit @@ -37,9 +37,9 @@ from ${stats_db_name}.publication p join ${stats_db_name}.result_classifications rt on rt.id = p.id where rt.type not in ('Article','Part of book or chapter of book','Book','Doctoral thesis','Master thesis','Data Paper', 'Thesis', 'Bachelor thesis', 'Conference object') and not exists (select 1 from ${stats_db_name}.result_classifications rc where type ='Other literature type' - and rc.id=p.id)) tmp on p.id=tmp.id; + and rc.id=p.id)) tmp on p.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; +drop table if exists ${stats_db_name}.indi_pub_doi_from_crossref purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_doi_from_crossref stored as parquet as select distinct p.id, coalesce(doi_from_crossref, 0) as doi_from_crossref @@ -48,10 +48,10 @@ from ${stats_db_name}.publication p (select ri.id, 1 as doi_from_crossref from ${stats_db_name}.result_instance ri join ${stats_db_name}.datasource d on d.id = ri.collectedfrom where pidtype='Digital Object Identifier' and d.name ='Crossref') tmp - on tmp.id=p.id; + on tmp.id=p.id; /*EOS*/ -- Sprint 2 ---- -drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; +drop table if exists ${stats_db_name}.indi_result_has_cc_licence purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_has_cc_licence stored as parquet as select distinct r.id, (case when lic='' or lic is null then 0 else 1 end) as has_cc_license @@ -59,9 +59,9 @@ from ${stats_db_name}.result r left outer join (select r.id, license.type as lic from ${stats_db_name}.result r join ${stats_db_name}.result_licenses as license on license.id = r.id where lower(license.type) LIKE '%creativecommons.org%' OR lower(license.type) LIKE '%cc-%') tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; +drop table if exists ${stats_db_name}.indi_result_has_cc_licence_url purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_has_cc_licence_url stored as parquet as select distinct r.id, case when lic_host='' or lic_host is null then 0 else 1 end as has_cc_license_url @@ -70,32 +70,32 @@ from ${stats_db_name}.result r from ${stats_db_name}.result r join ${stats_db_name}.result_licenses as license on license.id = r.id WHERE lower(parse_url(license.type, "HOST")) = "creativecommons.org") tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; +drop table if exists ${stats_db_name}.indi_pub_has_abstract purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_has_abstract stored as parquet as select distinct publication.id, cast(coalesce(abstract, true) as int) has_abstract -from ${stats_db_name}.publication; +from ${stats_db_name}.publication; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_with_orcid purge; +drop table if exists ${stats_db_name}.indi_result_with_orcid purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_with_orcid stored as parquet as select distinct r.id, coalesce(has_orcid, 0) as has_orcid from ${stats_db_name}.result r left outer join (select id, 1 as has_orcid from ${stats_db_name}.result_orcid) tmp - on r.id= tmp.id; + on r.id= tmp.id; /*EOS*/ ---- Sprint 3 ---- -drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; +drop table if exists ${stats_db_name}.indi_funded_result_with_fundref purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funded_result_with_fundref stored as parquet as select distinct r.result as id, coalesce(fundref, 0) as fundref from ${stats_db_name}.project_results r left outer join (select distinct result, 1 as fundref from ${stats_db_name}.project_results where provenance='Harvested') tmp - on r.result= tmp.result; + on r.result= tmp.result; /*EOS*/ -- create table indi_result_org_collab stored as parquet as -- select o1.organization org1, o2.organization org2, count(distinct o1.id) as collaborations @@ -105,65 +105,65 @@ from ${stats_db_name}.project_results r -- -- compute stats indi_result_org_collab; -- -create TEMPORARY TABLE ${stats_db_name}.tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro -join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; +create TEMPORARY VIEW tmp AS SELECT ro.organization organization, ro.id, o.name from ${stats_db_name}.result_organization ro +join ${stats_db_name}.organization o on o.id=ro.organization where o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_org_collab purge; +drop table if exists ${stats_db_name}.indi_result_org_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_org_collab stored as parquet as select o1.organization org1, o1.name org1name1, o2.organization org2, o2.name org2name2, count(o1.id) as collaborations -from ${stats_db_name}.tmp as o1 -join ${stats_db_name}.tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name -group by o1.organization, o2.organization, o1.name, o2.name; +from tmp as o1 +join tmp as o2 where o1.id=o2.id and o1.organization!=o2.organization and o1.name!=o2.name +group by o1.organization, o2.organization, o1.name, o2.name; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select distinct ro.organization organization, ro.id, o.name, o.country from ${stats_db_name}.result_organization ro -join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; +join ${stats_db_name}.organization o on o.id=ro.organization where country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; +drop table if exists ${stats_db_name}.indi_result_org_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_org_country_collab stored as parquet as select o1.organization org1,o1.name org1name1, o2.country country2, count(o1.id) as collaborations -from ${stats_db_name}.tmp as o1 join ${stats_db_name}.tmp as o2 on o1.id=o2.id +from tmp as o1 join tmp as o2 on o1.id=o2.id where o1.id=o2.id and o1.country!=o2.country -group by o1.organization, o1.id, o1.name, o2.country; +group by o1.organization, o1.id, o1.name, o2.country; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +drop table if exists tmp purge; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select o.id organization, o.name, ro.project as project from ${stats_db_name}.organization o - join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; + join ${stats_db_name}.organization_projects ro on o.id=ro.id where o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_project_collab_org purge; +drop table if exists ${stats_db_name}.indi_project_collab_org purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_project_collab_org stored as parquet as select o1.organization org1,o1.name orgname1, o2.organization org2, o2.name orgname2, count(distinct o1.project) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.project=o2.project +from tmp as o1 + join tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.name<>o2.name -group by o1.name,o2.name, o1.organization, o2.organization; +group by o1.name,o2.name, o1.organization, o2.organization; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select o.id organization, o.name, o.country , ro.project as project from ${stats_db_name}.organization o join ${stats_db_name}.organization_projects ro on o.id=ro.id - and o.country <> 'UNKNOWN' and o.name is not null; + and o.country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; +drop table if exists ${stats_db_name}.indi_project_collab_org_country purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_project_collab_org_country stored as parquet as select o1.organization org1,o1.name org1name, o2.country country2, count(distinct o1.project) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.project=o2.project +from tmp as o1 + join tmp as o2 on o1.project=o2.project where o1.organization<>o2.organization and o1.country<>o2.country -group by o1.organization, o2.country, o1.name; +group by o1.organization, o2.country, o1.name; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ -drop table if exists ${stats_db_name}.indi_funder_country_collab purge; +drop table if exists ${stats_db_name}.indi_funder_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_country_collab stored as parquet as with tmp as (select funder, project, country from ${stats_db_name}.organization_projects op @@ -174,26 +174,26 @@ select f1.funder, f1.country as country1, f2.country as country2, count(distinct from tmp as f1 join tmp as f2 on f1.project=f2.project where f1.country<>f2.country -group by f1.funder, f2.country, f1.country; +group by f1.funder, f2.country, f1.country; /*EOS*/ -create TEMPORARY TABLE ${stats_db_name}.tmp AS +create TEMPORARY VIEW tmp AS select distinct country, ro.id as result from ${stats_db_name}.organization o join ${stats_db_name}.result_organization ro on o.id=ro.organization - where country <> 'UNKNOWN' and o.name is not null; + where country <> 'UNKNOWN' and o.name is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_country_collab purge; +drop table if exists ${stats_db_name}.indi_result_country_collab purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_country_collab stored as parquet as select o1.country country1, o2.country country2, count(o1.result) as collaborations -from ${stats_db_name}.tmp as o1 - join ${stats_db_name}.tmp as o2 on o1.result=o2.result +from tmp as o1 + join tmp as o2 on o1.result=o2.result where o1.country<>o2.country -group by o1.country, o2.country; +group by o1.country, o2.country; /*EOS*/ -drop table if exists ${stats_db_name}.tmp purge; +DROP VIEW if exists tmp; /*EOS*/ ---- Sprint 4 ---- -drop table if exists ${stats_db_name}.indi_pub_diamond purge; +drop table if exists ${stats_db_name}.indi_pub_diamond purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_diamond stored as parquet as --select distinct pd.id, coalesce(in_diamond_journal, 0) as in_diamond_journal @@ -212,9 +212,9 @@ left outer join (select pd.id, 1 as in_diamond_journal from ${stats_db_name}.pub join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and (ps.journal_is_in_doaj=true or ps.journal_is_oa=true) and ps.has_apc=false) tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; +drop table if exists ${stats_db_name}.indi_pub_in_transformative purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_in_transformative stored as parquet as select distinct pd.id, coalesce(is_transformative, 0) as is_transformative @@ -224,9 +224,9 @@ from ${stats_db_name}.publication pd join ${stats_db_name}.datasource d on d.id=pd.datasource join STATS_EXT.plan_s_jn ps where (ps.issn_print=d.issn_printed and ps.issn_online=d.issn_online) and ps.is_transformative_journal=true) tmp - on pd.id=tmp.id; + on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; +drop table if exists ${stats_db_name}.indi_pub_closed_other_open purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_closed_other_open stored as parquet as select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_open from ${stats_db_name}.result_instance ri @@ -236,53 +236,53 @@ select distinct ri.id, coalesce(pub_closed_other_open, 0) as pub_closed_other_op join ${stats_db_name}.datasource d on ri.hostedby=d.id where d.type like '%Journal%' and ri.accessright='Closed Access' and (p.bestlicence='Open Access' or p.bestlicence='Open Source')) tmp - on tmp.id=ri.id; + on tmp.id=ri.id; /*EOS*/ ---- Sprint 5 ---- -drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; +drop table if exists ${stats_db_name}.indi_result_no_of_copies purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_no_of_copies stored as parquet as -select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; +select id, count(id) as number_of_copies from ${stats_db_name}.result_instance group by id; /*EOS*/ ---- Sprint 6 ---- -drop table if exists ${stats_db_name}.indi_pub_downloads purge; +drop table if exists ${stats_db_name}.indi_pub_downloads purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads stored as parquet as SELECT result_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id -order by no_downloads desc; +order by no_downloads desc; /*EOS*/ --ANALYZE TABLE ${stats_db_name}.indi_pub_downloads COMPUTE STATISTICS; -drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_datasource stored as parquet as SELECT result_id, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats join ${stats_db_name}.publication on result_id=id where downloads>0 GROUP BY result_id, repository_id -order by result_id; +order by result_id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 -GROUP BY result_id, substring(us.`date`, 1,4); +GROUP BY result_id, substring(us.`date`, 1,4); /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; +drop table if exists ${stats_db_name}.indi_pub_downloads_datasource_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_downloads_datasource_year stored as parquet as SELECT result_id, cast(substring(us.`date`, 1,4) as int) as `year`, repository_id, sum(downloads) no_downloads from openaire_prod_usage_stats.usage_stats us join ${stats_db_name}.publication on result_id=id where downloads>0 -GROUP BY result_id, repository_id, substring(us.`date`, 1,4); +GROUP BY result_id, repository_id, substring(us.`date`, 1,4); /*EOS*/ ---- Sprint 7 ---- -drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; +drop table if exists ${stats_db_name}.indi_pub_gold_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_gold_oa stored as parquet as -- WITH gold_oa AS ( SELECT @@ -381,9 +381,9 @@ left outer join ( select pd.id, 1 as is_gold FROM ${stats_db_name}.publication_datasources pd join dd on dd.id=pd.datasource - left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; + left outer join ${stats_db_name}.result_accessroute ra on ra.id = pd.id where ra.accessroute = 'gold') tmp on tmp.id=pd.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; +drop table if exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_hybrid_oa_with_cc stored as parquet as WITH hybrid_oa AS ( @@ -414,9 +414,9 @@ FROM ${stats_db_name}.publication_datasources pd JOIN hybrid_oa ON issn.issn = hybrid_oa.issn JOIN ${stats_db_name}.indi_result_has_cc_licence cc on pd.id=cc.id JOIN ${stats_db_name}.indi_pub_gold_oa ga on pd.id=ga.id - where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; + where cc.has_cc_license=1 and ga.is_gold=0) tmp on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_hybrid purge; +drop table if exists ${stats_db_name}.indi_pub_hybrid purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_hybrid stored as parquet as -- WITH gold_oa AS ( SELECT @@ -489,9 +489,9 @@ join ${stats_db_name}.result_accessroute ra on ra.id=pd.id join ${stats_db_name}.datasource d on d.id=ri.hostedby where indi_gold.is_gold=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is not null) or ra.accessroute='hybrid'))tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness purge; +drop table if exists ${stats_db_name}.indi_org_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet as --return results with PIDs, and rich metadata group by organization @@ -509,9 +509,9 @@ create table if not exists ${stats_db_name}.indi_org_fairness stored as parquet --return results_fair/all_results select allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness from allresults - join result_fair on result_fair.organization=allresults.organization; + join result_fair on result_fair.organization=allresults.organization; /*EOS*/ -CREATE TEMPORARY table ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id @@ -519,296 +519,296 @@ select ro.organization organization, count(distinct ro.id) no_result_fair join ${stats_db_name}.indi_pub_grey_lit gl on gl.id=p.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 and dc.doi_from_crossref=1 and gl.grey_lit=0 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_pr purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub_pr stored as parquet as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness -from ${stats_db_name}.allresults ar - join ${stats_db_name}.result_fair rf on rf.organization=ar.organization; +from allresults ar + join result_fair rf on rf.organization=ar.organization; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY table ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness -from ${stats_db_name}.allresults - join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; +from allresults + join result_fair on result_fair.organization=allresults.organization and result_fair.year=allresults.year; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result p on p.id=ro.id where cast(year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; +drop table if exists ${stats_db_name}.indi_org_fairness_pub purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_pub as select ar.organization, rf.no_result_fair/ar.no_allresults org_fairness -from ${stats_db_name}.allresults ar join ${stats_db_name}.result_fair rf -on rf.organization=ar.organization; +from allresults ar join result_fair rf +on rf.organization=ar.organization; /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_fair as +CREATE TEMPORARY VIEW result_fair as select year, ro.organization organization, count(distinct ro.id) no_result_fair from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id join ${stats_db_name}.result_pids rp on r.id=rp.id where (title is not null) and (publisher is not null) and (abstract=true) and (year is not null) and (authors>0) and cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int)>2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_fairness_year purge; +drop table if exists ${stats_db_name}.indi_org_fairness_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_fairness_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_fair.no_result_fair/allresults.no_allresults org_fairness - from ${stats_db_name}.allresults - join ${stats_db_name}.result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); + from allresults + join result_fair on result_fair.organization=allresults.organization and cast(result_fair.year as int)=cast(allresults.year as int); /*EOS*/ -DROP table ${stats_db_name}.result_fair purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_fair; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as +CREATE TEMPORARY VIEW result_with_pid as select year, ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select year, ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by ro.organization, year; + group by ro.organization, year; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_findable_year purge; +drop table if exists ${stats_db_name}.indi_org_findable_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_findable_year stored as parquet as select cast(allresults.year as int) year, allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from ${stats_db_name}.allresults - join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); +from allresults + join result_with_pid on result_with_pid.organization=allresults.organization and cast(result_with_pid.year as int)=cast(allresults.year as int); /*EOS*/ -DROP table ${stats_db_name}.result_with_pid purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_with_pid; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.result_with_pid as +CREATE TEMPORARY VIEW result_with_pid as select ro.organization, count(distinct rp.id) no_result_with_pid from ${stats_db_name}.result_organization ro join ${stats_db_name}.result_pids rp on rp.id=ro.id join ${stats_db_name}.result r on r.id=rp.id where cast(year as int) >2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allresults as +CREATE TEMPORARY VIEW allresults as select ro.organization, count(distinct ro.id) no_allresults from ${stats_db_name}.result_organization ro join ${stats_db_name}.result r on r.id=ro.id where cast(year as int) >2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_findable purge; +drop table if exists ${stats_db_name}.indi_org_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_findable stored as parquet as select allresults.organization, result_with_pid.no_result_with_pid/allresults.no_allresults org_findable -from ${stats_db_name}.allresults - join ${stats_db_name}.result_with_pid on result_with_pid.organization=allresults.organization; +from allresults + join result_with_pid on result_with_pid.organization=allresults.organization; /*EOS*/ -DROP table ${stats_db_name}.result_with_pid purge; -DROP table ${stats_db_name}.allresults purge; +DROP VIEW result_with_pid; /*EOS*/ +DROP VIEW allresults; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +CREATE TEMPORARY VIEW pubs_oa as SELECT ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +CREATE TEMPORARY VIEW datasets_oa as SELECT ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +CREATE TEMPORARY VIEW software_oa as SELECT ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as SELECT ro.organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.publication ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as SELECT ro.organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.dataset ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as SELECT ro.organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.software ps on ps.id=ro.id where cast(ps.year as int)>2003 - group by ro.organization; + group by ro.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization; + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization; + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization; /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_openess purge; +drop table if exists ${stats_db_name}.indi_org_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_openess stored as parquet as select allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM ${stats_db_name}.allpubsshare + org_openess FROM allpubsshare left outer join (select organization,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization left outer join (select organization,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.organization=allpubsshare.organization; + allsoftwaresshare) tmp2 + on tmp2.organization=allpubsshare.organization; /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa AS +CREATE TEMPORARY VIEW pubs_oa AS SELECT r.year, ro.organization, count(distinct r.id) no_oapubs FROM ${stats_db_name}.publication r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization,r.year; + group by ro.organization,r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa AS +CREATE TEMPORARY VIEW datasets_oa AS SELECT r.year,ro.organization, count(distinct r.id) no_oadatasets FROM ${stats_db_name}.dataset r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization, r.year; + group by ro.organization, r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa AS +CREATE TEMPORARY VIEW software_oa AS SELECT r.year,ro.organization, count(distinct r.id) no_oasoftware FROM ${stats_db_name}.software r join ${stats_db_name}.result_organization ro on ro.id=r.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 - group by ro.organization, r.year; + group by ro.organization, r.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as SELECT p.year,ro.organization organization, count(ro.id) no_allpubs FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.publication p on p.id=ro.id where cast(p.year as int)>2003 - group by ro.organization, p.year; + group by ro.organization, p.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as SELECT d.year, ro.organization organization, count(ro.id) no_alldatasets FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.dataset d on d.id=ro.id where cast(d.year as int)>2003 - group by ro.organization, d.year; + group by ro.organization, d.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as SELECT s.year,ro.organization organization, count(ro.id) no_allsoftware FROM ${stats_db_name}.result_organization ro join ${stats_db_name}.software s on s.id=ro.id where cast(s.year as int)>2003 - group by ro.organization, s.year; + group by ro.organization, s.year; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); +CREATE TEMPORARY VIEW allpubsshare as +select allpubs.year, pubs_oa.organization, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.organization=pubs_oa.organization where cast(allpubs.year as INT)=cast(pubs_oa.year as int); /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select alldatasets.year, datasets_oa.organization, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); + from alldatasets + join datasets_oa on alldatasets.organization=datasets_oa.organization where cast(alldatasets.year as INT)=cast(datasets_oa.year as int); /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select allsoftware.year, software_oa.organization, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); + from allsoftware + join software_oa on allsoftware.organization=software_oa.organization where cast(allsoftware.year as INT)=cast(software_oa.year as int); /*EOS*/ -drop table if exists ${stats_db_name}.indi_org_openess_year purge; +drop table if exists ${stats_db_name}.indi_org_openess_year purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_org_openess_year stored as parquet as select cast(allpubsshare.year as int) year, allpubsshare.organization, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - org_openess FROM ${stats_db_name}.allpubsshare + org_openess FROM allpubsshare left outer join (select cast(year as int), organization,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.organization=allpubsshare.organization and tmp1.year=allpubsshare.year left outer join (select cast(year as int), organization,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.organization=allpubsshare.organization and cast(tmp2.year as int)=cast(allpubsshare.year as int); + allsoftwaresshare) tmp2 + on tmp2.organization=allpubsshare.organization and cast(tmp2.year as int)=cast(allpubsshare.year as int); /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; +drop table if exists ${stats_db_name}.indi_pub_has_preprint purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_has_preprint stored as parquet as select distinct p.id, coalesce(has_preprint, 0) as has_preprint @@ -817,8 +817,8 @@ from ${stats_db_name}.publication_classifications p select p.id, 1 as has_preprint from ${stats_db_name}.publication_classifications p where p.type='Preprint') tmp - on p.id= tmp.id; -drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; + on p.id= tmp.id; /*EOS*/ +drop table if exists ${stats_db_name}.indi_pub_in_subscribed purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_in_subscribed stored as parquet as select distinct p.id, coalesce(is_subscription, 0) as is_subscription @@ -829,9 +829,9 @@ from ${stats_db_name}.publication p join ${stats_db_name}.indi_pub_hybrid h on p.id=h.id join ${stats_db_name}.indi_pub_in_transformative t on p.id=t.id where g.is_gold=0 and h.is_hybrid=0 and t.is_transformative=0) tmp - on p.id=tmp.id; + on p.id=tmp.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_result_with_pid purge; +drop table if exists ${stats_db_name}.indi_result_with_pid purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_result_with_pid as select distinct p.id, coalesce(result_with_pid, 0) as result_with_pid @@ -839,25 +839,25 @@ from ${stats_db_name}.result p left outer join ( select p.id, 1 as result_with_pid from ${stats_db_name}.result_pids p) tmp - on p.id= tmp.id; + on p.id= tmp.id; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pub_fos_totals as +CREATE TEMPORARY VIEW pub_fos_totals as select rf.id, count(distinct lvl3) totals from ${stats_db_name}.result_fos rf -group by rf.id; +group by rf.id; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; +drop table if exists ${stats_db_name}.indi_pub_interdisciplinarity purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_interdisciplinarity as select distinct p.id as id, coalesce(is_interdisciplinary, 0) as is_interdisciplinary -from ${stats_db_name}.pub_fos_totals p +from pub_fos_totals p left outer join ( -select pub_fos_totals.id, 1 as is_interdisciplinary from ${stats_db_name}.pub_fos_totals -where totals>1) tmp on p.id=tmp.id; +select pub_fos_totals.id, 1 as is_interdisciplinary from pub_fos_totals +where totals>1) tmp on p.id=tmp.id; /*EOS*/ -drop table ${stats_db_name}.pub_fos_totals purge; +drop view pub_fos_totals; /*EOS*/ -drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; +drop table if exists ${stats_db_name}.indi_pub_bronze_oa purge; /*EOS*/ --create table if not exists ${stats_db_name}.indi_pub_bronze_oa stored as parquet as --select distinct p.id, coalesce(is_bronze_oa,0) as is_bronze_oa @@ -883,38 +883,38 @@ join ${stats_db_name}.datasource d on d.id=ri.hostedby where indi_gold.is_gold=0 and indi_hybrid.is_hybrid=0 and ((d.type like '%Journal%' and ri.accessright!='Closed Access' and ri.accessright!='Restricted' and ri.license is null) or ra.accessroute='bronze')) tmp -on pd.id=tmp.id; +on pd.id=tmp.id; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.project_year_result_year as +CREATE TEMPORARY VIEW project_year_result_year as select p.id project_id, acronym, r.id result_id, r.year, p.end_year from ${stats_db_name}.project p join ${stats_db_name}.result_projects rp on p.id=rp.project join ${stats_db_name}.result r on r.id=rp.id -where p.end_year is NOT NULL and r.year is not null; +where p.end_year is NOT NULL and r.year is not null; /*EOS*/ -drop table if exists ${stats_db_name}.indi_is_project_result_after purge; +drop table if exists ${stats_db_name}.indi_is_project_result_after purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_is_project_result_after stored as parquet as select pry.project_id, pry.acronym, pry.result_id, coalesce(is_project_result_after, 0) as is_project_result_after -from ${stats_db_name}.project_year_result_year pry +from project_year_result_year pry left outer join (select pry.project_id, pry.acronym, pry.result_id, 1 as is_project_result_after -from ${stats_db_name}.project_year_result_year pry -where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; +from project_year_result_year pry +where pry.year>pry.end_year) tmp on pry.result_id=tmp.result_id; /*EOS*/ -drop table ${stats_db_name}.project_year_result_year purge; +drop view project_year_result_year; /*EOS*/ -drop table ${stats_db_name}.indi_is_funder_plan_s purge; +drop table if exists ${stats_db_name}.indi_is_funder_plan_s purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_is_funder_plan_s stored as parquet as select distinct f.id, f.name, coalesce(is_funder_plan_s, 0) as is_funder_plan_s from ${stats_db_name}.funder f left outer join (select id, name, 1 as is_funder_plan_s from ${stats_db_name}.funder join stats_ext.plan_s_short on c_o_alition_s_organisation_funder=name) tmp - on f.name= tmp.name; + on f.name= tmp.name; /*EOS*/ --Funder Fairness -drop table ${stats_db_name}.indi_funder_fairness purge; +drop table if exists ${stats_db_name}.indi_funder_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_fairness stored as parquet as with result_fair as @@ -930,10 +930,10 @@ create table if not exists ${stats_db_name}.indi_funder_fairness stored as parqu group by p.funder) select allresults.funder, result_fair.no_result_fair/allresults.no_allresults funder_fairness from allresults - join result_fair on result_fair.funder=allresults.funder; + join result_fair on result_fair.funder=allresults.funder; /*EOS*/ --RIs Fairness -drop table ${stats_db_name}.indi_ris_fairness purge; +drop table if exists ${stats_db_name}.indi_ris_fairness purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_fairness stored as parquet as with result_contexts as @@ -953,188 +953,188 @@ allresults as group by rc.ri_initiative) select allresults.ri_initiative, result_fair.no_result_fair/allresults.no_allresults ris_fairness from allresults - join result_fair on result_fair.ri_initiative=allresults.ri_initiative; + join result_fair on result_fair.ri_initiative=allresults.ri_initiative; /*EOS*/ --Funder Openess -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as +CREATE TEMPORARY VIEW pubs_oa as select p.funder funder, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.publication r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as +CREATE TEMPORARY VIEW datasets_oa as select p.funder funder, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.dataset r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as +CREATE TEMPORARY VIEW software_oa as select p.funder funder, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.software r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as +CREATE TEMPORARY VIEW allpubs as select p.funder funder, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.publication r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as +CREATE TEMPORARY VIEW alldatasets as select p.funder funder, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.dataset r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as +CREATE TEMPORARY VIEW allsoftware as select p.funder funder, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_projects rp join ${stats_db_name}.project p on p.id=rp.project join ${stats_db_name}.software r on r.id=rp.id where cast(r.year as int)>2003 -group by p.funder; +group by p.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.funder, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.funder=pubs_oa.funder; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.funder, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.funder=pubs_oa.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.funder, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.funder=datasets_oa.funder; + from alldatasets + join datasets_oa on alldatasets.funder=datasets_oa.funder; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.funder, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.funder=software_oa.funder; + from allsoftware + join software_oa on allsoftware.funder=software_oa.funder; /*EOS*/ -drop table ${stats_db_name}.indi_funder_openess purge; +drop table if exists ${stats_db_name}.indi_funder_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_openess stored as parquet as select allpubsshare.funder, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - funder_openess FROM ${stats_db_name}.allpubsshare + funder_openess FROM allpubsshare left outer join (select funder,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.funder=allpubsshare.funder left outer join (select funder,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.funder=allpubsshare.funder; + allsoftwaresshare) tmp2 + on tmp2.funder=allpubsshare.funder; /*EOS*/ -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ --RIs Openess -CREATE TEMPORARY TABLE ${stats_db_name}.result_contexts as +CREATE TEMPORARY VIEW result_contexts as select distinct rc.id, context.name ri_initiative from ${stats_db_name}.result_concepts rc join ${stats_db_name}.concept on concept.id=rc.concept join ${stats_db_name}.category on category.id=concept.category -join ${stats_db_name}.context on context.id=category.context; +join ${stats_db_name}.context on context.id=category.context; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.pubs_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oapubs from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW pubs_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oapubs from result_contexts rp join ${stats_db_name}.publication r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.datasets_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oadatasets from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW datasets_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oadatasets from result_contexts rp join ${stats_db_name}.dataset r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.software_oa as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oasoftware from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW software_oa as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_oasoftware from result_contexts rp join ${stats_db_name}.software r on r.id=rp.id join ${stats_db_name}.result_instance ri on ri.id=r.id where (ri.accessright = 'Open Access' or ri.accessright = 'Embargo' or ri.accessright = 'Open Source') and cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubs as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allpubs from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW allpubs as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allpubs from result_contexts rp join ${stats_db_name}.publication r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasets as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_alldatasets from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW alldatasets as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_alldatasets from result_contexts rp join ${stats_db_name}.dataset r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftware as -select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allsoftware from ${stats_db_name}.result_contexts rp +CREATE TEMPORARY VIEW allsoftware as +select rp.ri_initiative ri_initiative, count(distinct rp.id) no_allsoftware from result_contexts rp join ${stats_db_name}.software r on r.id=rp.id where cast(r.year as int)>2003 -group by rp.ri_initiative; +group by rp.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allpubsshare as -select pubs_oa.ri_initiative, pubs_oa.no_oapubs/allpubs.no_allpubs p from ${stats_db_name}.allpubs - join ${stats_db_name}.pubs_oa on allpubs.ri_initiative=pubs_oa.ri_initiative; +CREATE TEMPORARY VIEW allpubsshare as +select pubs_oa.ri_initiative, pubs_oa.no_oapubs/allpubs.no_allpubs p from allpubs + join pubs_oa on allpubs.ri_initiative=pubs_oa.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.alldatasetssshare as +CREATE TEMPORARY VIEW alldatasetssshare as select datasets_oa.ri_initiative, datasets_oa.no_oadatasets/alldatasets.no_alldatasets d - from ${stats_db_name}.alldatasets - join ${stats_db_name}.datasets_oa on alldatasets.ri_initiative=datasets_oa.ri_initiative; + from alldatasets + join datasets_oa on alldatasets.ri_initiative=datasets_oa.ri_initiative; /*EOS*/ -CREATE TEMPORARY TABLE ${stats_db_name}.allsoftwaresshare as +CREATE TEMPORARY VIEW allsoftwaresshare as select software_oa.ri_initiative, software_oa.no_oasoftware/allsoftware.no_allsoftware s - from ${stats_db_name}.allsoftware - join ${stats_db_name}.software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; + from allsoftware + join software_oa on allsoftware.ri_initiative=software_oa.ri_initiative; /*EOS*/ -drop table ${stats_db_name}.indi_ris_openess purge; +drop table if exists ${stats_db_name}.indi_ris_openess purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_openess stored as parquet as select allpubsshare.ri_initiative, (p+if(isnull(s),0,s)+if(isnull(d),0,d))/(1+(case when s is null then 0 else 1 end) +(case when d is null then 0 else 1 end)) - ris_openess FROM ${stats_db_name}.allpubsshare + ris_openess FROM allpubsshare left outer join (select ri_initiative,d from - ${stats_db_name}.alldatasetssshare) tmp1 + alldatasetssshare) tmp1 on tmp1.ri_initiative=allpubsshare.ri_initiative left outer join (select ri_initiative,s from - ${stats_db_name}.allsoftwaresshare) tmp2 - on tmp2.ri_initiative=allpubsshare.ri_initiative; + allsoftwaresshare) tmp2 + on tmp2.ri_initiative=allpubsshare.ri_initiative; /*EOS*/ -DROP TABLE ${stats_db_name}.result_contexts purge; -DROP TABLE ${stats_db_name}.pubs_oa purge; -DROP TABLE ${stats_db_name}.datasets_oa purge; -DROP TABLE ${stats_db_name}.software_oa purge; -DROP TABLE ${stats_db_name}.allpubs purge; -DROP TABLE ${stats_db_name}.alldatasets purge; -DROP TABLE ${stats_db_name}.allsoftware purge; -DROP TABLE ${stats_db_name}.allpubsshare purge; -DROP TABLE ${stats_db_name}.alldatasetssshare purge; -DROP TABLE ${stats_db_name}.allsoftwaresshare purge; +DROP VIEW result_contexts; /*EOS*/ +DROP VIEW pubs_oa; /*EOS*/ +DROP VIEW datasets_oa; /*EOS*/ +DROP VIEW software_oa; /*EOS*/ +DROP VIEW allpubs; /*EOS*/ +DROP VIEW alldatasets; /*EOS*/ +DROP VIEW allsoftware; /*EOS*/ +DROP VIEW allpubsshare; /*EOS*/ +DROP VIEW alldatasetssshare; /*EOS*/ +DROP VIEW allsoftwaresshare; /*EOS*/ --Funder Findability -drop table ${stats_db_name}.indi_funder_findable purge; +drop table if exists ${stats_db_name}.indi_funder_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_funder_findable stored as parquet as with result_findable as @@ -1151,10 +1151,10 @@ with result_findable as group by p.funder) select allresults.funder, result_findable.no_result_findable/allresults.no_allresults funder_findable from allresults - join result_findable on result_findable.funder=allresults.funder; + join result_findable on result_findable.funder=allresults.funder; /*EOS*/ --RIs Findability -drop table ${stats_db_name}.indi_ris_findable purge; +drop table if exists ${stats_db_name}.indi_ris_findable purge; /*EOS*/ create table if not exists ${stats_db_name}.indi_ris_findable stored as parquet as with result_contexts as @@ -1175,7 +1175,7 @@ allresults as group by rc.ri_initiative) select allresults.ri_initiative, result_findable.no_result_findable/allresults.no_allresults ris_findable from allresults - join result_findable on result_findable.ri_initiative=allresults.ri_initiative; + join result_findable on result_findable.ri_initiative=allresults.ri_initiative; /*EOS*/ create table if not exists ${stats_db_name}.indi_pub_publicly_funded stored as parquet as with org_names_pids as @@ -1195,6 +1195,7 @@ and pf.publicly_funded='yes') foo) select distinct p.id, coalesce(publicly_funded, 0) as publicly_funded from ${stats_db_name}.publication p left outer join ( -select distinct ro.id, 1 as publicly_funded from result_organization ro +select distinct ro.id, 1 as publicly_funded from ${stats_db_name}.result_organization ro join ${stats_db_name}.organization o on o.id=ro.organization -join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; \ No newline at end of file +join publicly_funded_orgs pfo on o.name=pfo.name) tmp on p.id=tmp.id; /*EOS*/ + diff --git a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml index cbf97944d..709de6595 100644 --- a/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-stats-update/src/main/resources/eu/dnetlib/dhp/oa/graph/stats/oozie_app/workflow.xml @@ -64,6 +64,26 @@ hadoop_user_name user name of the wf owner + + + sparkSqlWarehouseDir + + + + sparkClusterOpts + --conf spark.network.timeout=600 --conf spark.extraListeners= --conf spark.sql.queryExecutionListeners= --conf spark.yarn.historyServer.address=http://iis-cdh5-test-m3.ocean.icm.edu.pl:18088 --conf spark.eventLog.dir=hdfs://nameservice1/user/spark/applicationHistory + spark cluster-wide options + + + sparkResourceOpts + --executor-memory=6G --conf spark.executor.memoryOverhead=4G --executor-cores=6 --driver-memory=8G --driver-cores=4 + spark resource options + + + sparkApplicationOpts + --conf spark.sql.shuffle.partitions=3840 + spark resource options + @@ -75,13 +95,21 @@ ${hive_metastore_uris} - hive.txn.timeout - ${hive_timeout} + hive.txn.timeout + ${hive_timeout} + + + hive.mapjoin.followby.gby.localtask.max.memory.usage + 0.80 + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + mapred.job.queue.name + analytics - - mapred.job.queue.name - analytics - @@ -129,164 +157,164 @@ ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name}
- + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} - +
- + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} - external_stats_db_name=${external_stats_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} + external_stats_db_name=${external_stats_db_name} - - +
+ ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} - + ${hive_jdbc_url} - stats_db_name=${stats_db_name} - openaire_db_name=${openaire_db_name} + stats_db_name=${stats_db_name} + openaire_db_name=${openaire_db_name} @@ -318,12 +346,23 @@ - - ${hive_jdbc_url} - - stats_db_name=${stats_db_name} - external_stats_db_name=${external_stats_db_name} - + + yarn + cluster + Step16-createIndicatorsTables + eu.dnetlib.dhp.oozie.RunSQLSparkJob + dhp-stats-update-${projectVersion}.jar + + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + ${sparkClusterOpts} + ${sparkResourceOpts} + ${sparkApplicationOpts} + + --hiveMetastoreUris${hive_metastore_uris} + --sqleu/dnetlib/dhp/oa/graph/stats/oozie_app/scripts/step16-createIndicatorsTables.sql + --stats_db_name${stats_db_name} + --external_stats_db_name${external_stats_db_name} + @@ -383,18 +422,18 @@
- - - - - - - - - - - - + + + + + + + + + + + + @@ -439,8 +478,8 @@ ${jobTracker} ${nameNode} copyDataToImpalaCluster.sh - - + + ${stats_db_name} ${monitor_db_name} ${observatory_db_name} @@ -501,4 +540,4 @@ - + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 3fd351c1d..6ef320253 100644 --- a/pom.xml +++ b/pom.xml @@ -931,5 +931,25 @@ --> + + + + arm-silicon-mac + + + aarch64 + mac + + + + + + org.xerial.snappy + snappy-java + 1.1.8.4 + + + + \ No newline at end of file