diff --git a/.env b/.env new file mode 100644 index 0000000..9ed2036 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +URL="http://snf-23385.ok-kno.grnetcloud.net" +BASE_URL="/" diff --git a/README.md b/README.md index 171382e..90cc01b 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,11 @@ $ git clone https://code-repo.d4science.org/D-Net/openaire-graph-docs.git ## Local installation and deployment +From https://docusaurus.io/docs/installation#requirements +> Node.js version 16.14 or above (which can be checked by running node -v) + + + To install the required packages use: ``` $ npm install diff --git a/docs/assets/badges/openaire-badge-1.png b/docs/assets/badges/openaire-badge-1.png new file mode 100644 index 0000000..c88ff30 Binary files /dev/null and b/docs/assets/badges/openaire-badge-1.png differ diff --git a/docs/assets/badges/openaire-badge-1.zip b/docs/assets/badges/openaire-badge-1.zip new file mode 100644 index 0000000..c253811 Binary files /dev/null and b/docs/assets/badges/openaire-badge-1.zip differ diff --git a/docs/assets/badges/openaire-badge-2.png b/docs/assets/badges/openaire-badge-2.png new file mode 100644 index 0000000..8be5e56 Binary files /dev/null and b/docs/assets/badges/openaire-badge-2.png differ diff --git a/docs/assets/badges/openaire-badge-2.zip b/docs/assets/badges/openaire-badge-2.zip new file mode 100644 index 0000000..2858237 Binary files /dev/null and b/docs/assets/badges/openaire-badge-2.zip differ diff --git a/docs/assets/badges/openaire-badge-3.png b/docs/assets/badges/openaire-badge-3.png new file mode 100644 index 0000000..3cda85f Binary files /dev/null and b/docs/assets/badges/openaire-badge-3.png differ diff --git a/docs/assets/badges/openaire-badge-3.zip b/docs/assets/badges/openaire-badge-3.zip new file mode 100644 index 0000000..cf3541e Binary files /dev/null and b/docs/assets/badges/openaire-badge-3.zip differ diff --git a/docs/assets/img/aggregation.png b/docs/assets/img/aggregation.png new file mode 100644 index 0000000..e0f04e5 Binary files /dev/null and b/docs/assets/img/aggregation.png differ diff --git a/docs/assets/img/architecture.png b/docs/assets/img/architecture.png new file mode 100644 index 0000000..ad920fb Binary files /dev/null and b/docs/assets/img/architecture.png differ diff --git a/static/img/docs/data-model.png b/docs/assets/img/data-model.png similarity index 100% rename from static/img/docs/data-model.png rename to docs/assets/img/data-model.png diff --git a/docs/assets/img/decisiontree-dataset-orp.png b/docs/assets/img/decisiontree-dataset-orp.png new file mode 100644 index 0000000..e2184f0 Binary files /dev/null and b/docs/assets/img/decisiontree-dataset-orp.png differ diff --git a/docs/assets/img/decisiontree-organization.png b/docs/assets/img/decisiontree-organization.png new file mode 100644 index 0000000..7b71e12 Binary files /dev/null and b/docs/assets/img/decisiontree-organization.png differ diff --git a/docs/assets/img/decisiontree-publication.png b/docs/assets/img/decisiontree-publication.png new file mode 100644 index 0000000..50d173d Binary files /dev/null and b/docs/assets/img/decisiontree-publication.png differ diff --git a/docs/assets/img/decisiontree-software.png b/docs/assets/img/decisiontree-software.png new file mode 100644 index 0000000..7cdb108 Binary files /dev/null and b/docs/assets/img/decisiontree-software.png differ diff --git a/docs/assets/img/dedup-results.png b/docs/assets/img/dedup-results.png new file mode 100644 index 0000000..8ab84d1 Binary files /dev/null and b/docs/assets/img/dedup-results.png differ diff --git a/docs/assets/img/deduplication-workflow.png b/docs/assets/img/deduplication-workflow.png new file mode 100644 index 0000000..2c21487 Binary files /dev/null and b/docs/assets/img/deduplication-workflow.png differ diff --git a/docs/assets/img/enrichment/bulktagging_datasource.png b/docs/assets/img/enrichment/bulktagging_datasource.png new file mode 100644 index 0000000..2b78501 Binary files /dev/null and b/docs/assets/img/enrichment/bulktagging_datasource.png differ diff --git a/docs/assets/img/enrichment/bulktagging_selconstraints.png b/docs/assets/img/enrichment/bulktagging_selconstraints.png new file mode 100644 index 0000000..d1cff89 Binary files /dev/null and b/docs/assets/img/enrichment/bulktagging_selconstraints.png differ diff --git a/docs/assets/img/enrichment/bulktagging_subject.png b/docs/assets/img/enrichment/bulktagging_subject.png new file mode 100644 index 0000000..2ea815a Binary files /dev/null and b/docs/assets/img/enrichment/bulktagging_subject.png differ diff --git a/docs/assets/img/enrichment/bulktagging_zenodo.png b/docs/assets/img/enrichment/bulktagging_zenodo.png new file mode 100644 index 0000000..d8626f1 Binary files /dev/null and b/docs/assets/img/enrichment/bulktagging_zenodo.png differ diff --git a/docs/assets/img/enrichment/organization_tree.png b/docs/assets/img/enrichment/organization_tree.png new file mode 100644 index 0000000..c4d3df3 Binary files /dev/null and b/docs/assets/img/enrichment/organization_tree.png differ diff --git a/docs/assets/img/enrichment/propagation_affiliationistrepo.png b/docs/assets/img/enrichment/propagation_affiliationistrepo.png new file mode 100644 index 0000000..6961a9a Binary files /dev/null and b/docs/assets/img/enrichment/propagation_affiliationistrepo.png differ diff --git a/docs/assets/img/enrichment/propagation_country.png b/docs/assets/img/enrichment/propagation_country.png new file mode 100644 index 0000000..8f90911 Binary files /dev/null and b/docs/assets/img/enrichment/propagation_country.png differ diff --git a/docs/assets/img/enrichment/propagation_orcid.png b/docs/assets/img/enrichment/propagation_orcid.png new file mode 100644 index 0000000..49a230e Binary files /dev/null and b/docs/assets/img/enrichment/propagation_orcid.png differ diff --git a/docs/assets/img/enrichment/propagation_organizationsemrel.png b/docs/assets/img/enrichment/propagation_organizationsemrel.png new file mode 100644 index 0000000..caef457 Binary files /dev/null and b/docs/assets/img/enrichment/propagation_organizationsemrel.png differ diff --git a/docs/assets/img/enrichment/propagation_resulttocommunitythroughorganization.png b/docs/assets/img/enrichment/propagation_resulttocommunitythroughorganization.png new file mode 100644 index 0000000..18dcc42 Binary files /dev/null and b/docs/assets/img/enrichment/propagation_resulttocommunitythroughorganization.png differ diff --git a/docs/assets/img/enrichment/propagation_resulttocommunitythroughsemrel.png b/docs/assets/img/enrichment/propagation_resulttocommunitythroughsemrel.png new file mode 100644 index 0000000..9c6e0eb Binary files /dev/null and b/docs/assets/img/enrichment/propagation_resulttocommunitythroughsemrel.png differ diff --git a/docs/assets/img/enrichment/propagation_resulttoproject.png b/docs/assets/img/enrichment/propagation_resulttoproject.png new file mode 100644 index 0000000..4960b42 Binary files /dev/null and b/docs/assets/img/enrichment/propagation_resulttoproject.png differ diff --git a/docs/data-model/data-model.md b/docs/data-model/data-model.md index 3a43be6..4718a5f 100644 --- a/docs/data-model/data-model.md +++ b/docs/data-model/data-model.md @@ -1,11 +1,11 @@ # Data model -The OpenAIRE Graph comprises several types of [entities](../category/entities) and [relationships](./relationships) among them. +The OpenAIRE Research Graph comprises several types of [entities](../category/entities) and [relationships](./relationships) among them. -The latest version of the JSON schema can be found on [Bulk downloads](../download). +The latest version of the JSON schema can be found on the [Downloads](../downloads/full-graph) section.
- +
The figure above, presents the graph's data model. @@ -20,6 +20,6 @@ responsible for operating data sources or consisting the affiliations of Product :::note Further reading -A detailed report on the OpenAIRE Graph Data Model can be found on [Zenodo](https://zenodo.org/record/2643199). +A detailed report on the OpenAIRE Research Graph Data Model can be found on [Zenodo](https://zenodo.org/record/2643199). ::: diff --git a/docs/data-model/entities/_category_.json b/docs/data-model/entities/_category_.json index 8161451..1740b21 100644 --- a/docs/data-model/entities/_category_.json +++ b/docs/data-model/entities/_category_.json @@ -3,6 +3,6 @@ "position": 1, "link": { "type": "generated-index", - "description": "The main entities of the OpenAIRE Graph are listed below." + "description": "The main entities of the OpenAIRE Research Graph are listed below." } } \ No newline at end of file diff --git a/docs/data-model/pids-and-identifiers.md b/docs/data-model/pids-and-identifiers.md index 5156beb..17365b3 100644 --- a/docs/data-model/pids-and-identifiers.md +++ b/docs/data-model/pids-and-identifiers.md @@ -1,8 +1,8 @@ # PIDs and identifiers -Ensuring the stability of the records and their identifiers in the OpenAIRE Graph is a challenge. -Data sources from which OpenAIRE collects metadata records are subject to constant variations: records in repositories vary in content, -original IDs, and PIDs, may disappear or reappear, and the same holds for the data source itself or the metadata collection it exposes. +One of the challenges towards the stability of the contents in the OpenAIRE Research Graph consists of making its identifiers and records stable over time. +The barriers to this scenario are many, as the Graph keeps a map of data sources that is subject to constant variations: records in repositories vary in content, +original IDs, and PIDs, may disappear or reappear, and the same holds for the repository or the metadata collection it exposes. Not only, but the mappings applied to the original contents may also change and improve over time to catch up with the changes in the input records. ## PID Authorities @@ -70,5 +70,5 @@ Currently, the following data sources are used as "PID authorities": | arXiv | `arXiv_______` | arXiv.org e-Print Archive | | handle | `handle______` | any repository | -OpenAIRE also perform duplicate identification (see the [dedicated section for details](../../data-provision/deduplication/)). +OpenAIRE also perform duplicate identification (see the [dedicated section for details](/data-provision/deduplication)). All duplicates are **merged** together in a **representative record** which must be assigned a dedicated OpenAIRE identifier (i.e. it cannot have the identifier of one of the aggregated record). diff --git a/docs/data-provision/aggregation/aggregation.md b/docs/data-provision/aggregation/aggregation.md index de3ae2c..94c82f2 100644 --- a/docs/data-provision/aggregation/aggregation.md +++ b/docs/data-provision/aggregation/aggregation.md @@ -4,17 +4,17 @@ sidebar_position: 1 # Aggregation -OpenAIRE materializes an open, participatory research graph (the OpenAIRE Graph) where products of the research life-cycle (e.g. scientific literature, research data, project, software) are semantically linked to each other and carry information about their access rights (i.e. if they are Open Access, Restricted, Embargoed, or Closed) and the sources from which they have been collected and where they are hosted. The OpenAIRE Graph is materialised via a set of autonomic, orchestrated workflows operating in a regimen of continuous data aggregation and integration. [1] +OpenAIRE materializes an open, participatory research graph (the OpenAIRE Research Graph) where products of the research life-cycle (e.g. scientific literature, research data, project, software) are semantically linked to each other and carry information about their access rights (i.e. if they are Open Access, Restricted, Embargoed, or Closed) and the sources from which they have been collected and where they are hosted. The OpenAIRE Research Graph is materialised via a set of autonomic, orchestrated workflows operating in a regimen of continuous data aggregation and integration. [1] ## What does OpenAIRE collect? -OpenAIRE aggregates metadata records describing objects of the research life-cycle from content providers compliant to the [OpenAIRE guidelines](https://guidelines.openaire.eu/) and from entity registries (i.e. data sources offering authoritative lists of entities, like [OpenDOAR](https://v2.sherpa.ac.uk/opendoar/), [re3data](https://www.re3data.org/), [DOAJ](https://doaj.org/), and various funder databases). After collection, metadata are transformed according to the OpenAIRE internal metadata model, which is used to generate the final OpenAIRE Graph, accessible from the [OpenAIRE EXPLORE portal](https://explore.openaire.eu) and the [APIs](https://graph.openaire.eu/develop/). +OpenAIRE aggregates metadata records describing objects of the research life-cycle from content providers compliant to the [OpenAIRE guidelines](https://guidelines.openaire.eu/) and from entity registries (i.e. data sources offering authoritative lists of entities, like [OpenDOAR](https://v2.sherpa.ac.uk/opendoar/), [re3data](https://www.re3data.org/), [DOAJ](https://doaj.org/), and various funder databases). After collection, metadata are transformed according to the OpenAIRE internal metadata model, which is used to generate the final OpenAIRE Research Graph, accessible from the [OpenAIRE EXPLORE portal](https://explore.openaire.eu) and the [APIs](https://graph.openaire.eu/develop/). The transformation process includes the application of cleaning functions whose goal is to ensure that values are harmonised according to a common format (e.g. dates as YYYY-MM-dd) and, whenever applicable, to a common controlled vocabulary. The controlled vocabularies used for cleansing are accessible at [api.openaire.eu/vocabularies](https://api.openaire.eu/vocabularies/). Each vocabulary features a set of controlled terms, each with one code, one label, and a set of synonyms. If a synonym is found as field value, the value is updated with the corresponding term. -Also, the OpenAIRE Graph is extended with other relevant scholarly communication sources that do not follow the OpenAIRE Guidelines and/or are too large to be integrated via the “normal” aggregation mechanism: DOIBoost (which merges Crossref, ORCID, Microsoft Academic Graph, and Unpaywall). +In addition, the OpenAIRE Research Graph is extended with other relevant scholarly communication sources that need special handling, either because they do not strictly follow the OpenAIRE Guidelines or due to the vast amount of data of data they offer (e.g. DOIBoost, that merges Crossref, ORCID, Microsoft Academic Graph, and Unpaywall).- +
The OpenAIRE aggregation system collects information about objects of the research life-cycle compliant to the [OpenAIRE acquisition policy](https://www.openaire.eu/content-acquisition-policy) from [different types of data sources](https://explore.openaire.eu/search/find/dataproviders): @@ -30,9 +30,9 @@ Relationships between objects are collected from the data sources, but also auto ## What kind of data sources are in OpenAIRE? -Objects and relationships in the OpenAIRE Graph are extracted from information packages, i.e. metadata records, collected from data sources of the following kinds: +Objects and relationships in the OpenAIRE Research Graph are extracted from information packages, i.e. metadata records, collected from data sources of the following kinds: -- *Institutional or thematic repositories*: Information systems where scientists upload the bibliographic metadata and full-texts of their articles, due to obligations from their organization or due to community practices (e.g. ArXiv, Europe PMC); +- *Literature, Institutional and thematic repositories*: Information systems where scientists upload the bibliographic metadata and full-texts of their articles, due to obligations from their organization or due to community practices (e.g. ArXiv, Europe PMC); - *Open Access Publishers and journals*: Information system of open access publishers or relative journals, which offer bibliographic metadata and PDFs of their published articles; - *Data archives*: Information systems where scientists deposit descriptive metadata and files about their research data (also known as scientific data, datasets, etc.).; - *Hybrid repositories/archives*: information systems where scientists deposit metadata and file of any kind of scientific products, incuding scientific literature, research data and research software (e.g. Zenodo) @@ -46,11 +46,13 @@ Objects and relationships in the OpenAIRE Graph are extracted from information p OpenAIRE collects metadata records describing objects of the research life-cycle from content providers compliant to the OpenAIRE guidelines and from entity registries (i.e. data sources offering authoritative lists of entities, like OpenDOAR, re3data, DOAJ, and funder databases). The OpenAIRE aggregator collects metadata records in the majority of cases via [OAI-PMH](https://www.openarchives.org/pmh/), but also supports other standard exchange protocols like FTP(S), SFTP, and some RESTful API. +The whole list of available and used collectors could be found in the [RedMine Wiki - API Protocols](https://support.openaire.eu/projects/openaire/wiki/API_protocols) For additional details about the aggregation workflows, please refer to [2]. + ## References -[1] Manghi P. et al. (2014) "The D-NET software toolkit: A framework for the realization, maintenance, and operation of aggregative infrastructures", Program, Vol. 48 Issue: 4, pp.322-354, [10.1108/PROG-08-2013-0045](https://doi.org/10.1108/PROG-08-2013-0045) +[1] Manghi, P., Artini, M., Atzori, C., Bardi, A., Mannocci, A., La Bruzzo, S., Candela, L., Castelli, D. and Pagano, P. (2014), “The D-NET software toolkit: A framework for the realization, maintenance, and operation of aggregative infrastructures”, Program: electronic library and information systems, Vol. 48 No. 4, pp. 322-354. [doi:10.1108/prog-08-2013-0045](http://doi.org/10.1108/prog-08-2013-0045) -[2] Atzori, Claudio, Bardi, Alessia, Manghi, Paolo, & Mannocci, Andrea. (2017). The OpenAIRE workflows for data management. Zenodo. [10.5281/zenodo.996006](http://doi.org/10.5281/zenodo.996006) +[2] Atzori, C., Bardi, A., Manghi, P., & Mannocci, A. (2017, January). "The OpenAIRE workflows for data management". In Italian Research Conference on Digital Libraries (pp. 95-107). Springer, Cham. [doi:10.1007/978-3-319-68130-6_8](https://doi.org/10.1007/978-3-319-68130-6_8) \ No newline at end of file diff --git a/docs/data-provision/aggregation/compatible-sources.md b/docs/data-provision/aggregation/compatible-sources.md new file mode 100644 index 0000000..48d831e --- /dev/null +++ b/docs/data-provision/aggregation/compatible-sources.md @@ -0,0 +1,11 @@ +--- +sidebar_position: 1 +--- + +# OpenAIRE compatible sources + +The OpenAIRE aggregator collects metadata records from content providers compliant to the OpenAIRE guidelines. + +The OpenAIRE Guidelines help repository managers expose publications, datasets and CRIS metadata via the OAI-PMH protocol in order to integrate with OpenAIRE infrastructure. + +You can find more information in https://guidelines.openaire.eu/en/latest/ \ No newline at end of file diff --git a/docs/data-provision/aggregation/datacite.md b/docs/data-provision/aggregation/non-compatible-sources/datacite.md similarity index 99% rename from docs/data-provision/aggregation/datacite.md rename to docs/data-provision/aggregation/non-compatible-sources/datacite.md index e1fd166..463ab5f 100644 --- a/docs/data-provision/aggregation/datacite.md +++ b/docs/data-provision/aggregation/non-compatible-sources/datacite.md @@ -33,7 +33,7 @@ The metadata collection process identifies the most recent record date available ### Entity Mapping -The table below describes the mapping from the XML baseline records to the OpenAIRE Graph dump format. +The table below describes the mapping from the XML baseline records to the OpenAIRE Research Graph dump format. | OpenAIRE Result field path | Datacite record JSON path | # Notes | |--------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/data-provision/aggregation/doiboost.md b/docs/data-provision/aggregation/non-compatible-sources/doiboost.md similarity index 99% rename from docs/data-provision/aggregation/doiboost.md rename to docs/data-provision/aggregation/non-compatible-sources/doiboost.md index e79e384..709e9e0 100644 --- a/docs/data-provision/aggregation/doiboost.md +++ b/docs/data-provision/aggregation/non-compatible-sources/doiboost.md @@ -68,7 +68,7 @@ Records in Crossref are ruled out according to the following criteria Records with `type=dataset` are mapped into OpenAIRE results of type dataset. All others are mapped as OpenAIRE results of type publication. -### Mapping Crossref properties into the OpenAIRE Graph +### Mapping Crossref properties into the OpenAIRE Research Graph Properties in OpenAIRE results are set based on the logic described in the following table: @@ -222,7 +222,7 @@ Miriam will modify the process to ensure that: * Only papers with DOI are considered * Since for the same DOI we have multiple version of item with different MAG PaperId, we only take one per DOI (the last one we process). We call this dataset `Papers_distinct` -When mapping MAG records to the OpenAIRE Graph, we consider the following MAG tables: +When mapping MAG records to the OpenAIRE Research Graph, we consider the following MAG tables: * `PaperAbstractsInvertedIndex`: for the paper abstracts * `Authors`: for the authors. The MAG data is pre-processed by grouping authors by PaperId * `Affiliations` and `PaperAuthorAffiliations`: to generate links between publications and organisations diff --git a/docs/data-provision/aggregation/ebi.md b/docs/data-provision/aggregation/non-compatible-sources/ebi.md similarity index 99% rename from docs/data-provision/aggregation/ebi.md rename to docs/data-provision/aggregation/non-compatible-sources/ebi.md index f5abf7a..7641ec2 100644 --- a/docs/data-provision/aggregation/ebi.md +++ b/docs/data-provision/aggregation/non-compatible-sources/ebi.md @@ -69,7 +69,7 @@ curl -s "https://www.ebi.ac.uk/europepmc/webservices/rest/MED/33024307/datalinks ``` ## Mapping -The table below describes the mapping from the EBI links records to the OpenAIRE Graph dump format. +The table below describes the mapping from the EBI links records to the OpenAIRE Research Graph dump format. We filter all the target links with pid type **ena**, **pdb** or **uniprot** For each target we construct a Bioentity with the following mapping diff --git a/docs/data-provision/aggregation/pubmed.md b/docs/data-provision/aggregation/non-compatible-sources/pubmed.md similarity index 99% rename from docs/data-provision/aggregation/pubmed.md rename to docs/data-provision/aggregation/non-compatible-sources/pubmed.md index a6df81d..8e5cec1 100644 --- a/docs/data-provision/aggregation/pubmed.md +++ b/docs/data-provision/aggregation/non-compatible-sources/pubmed.md @@ -12,7 +12,7 @@ Pubmed exposes an entry point FTP with all the updates for each one. [ftp baseli ## Entity Mapping -The table below describes the mapping from the XML baseline records to the OpenAIRE Graph dump format. +The table below describes the mapping from the XML baseline records to the OpenAIRE Research Graph dump format. | OpenAIRE Result field path | PubMed record field xpath | Notes | |--------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/data-provision/post-cleaning.md b/docs/data-provision/cleaning.md similarity index 68% rename from docs/data-provision/post-cleaning.md rename to docs/data-provision/cleaning.md index b223fb4..e920026 100644 --- a/docs/data-provision/post-cleaning.md +++ b/docs/data-provision/cleaning.md @@ -1,15 +1,10 @@ ---- -sidebar_position: 4 ---- +# Cleaning -# Post cleaning -At the very end of the processing pipeline, a step is dedicated to perform cleaning operations aimed at improving the overall quality of the data. -The output of this final cleansing step is the final version of the OpenAIRE Graph. - -## Vocabulary based cleaning + The aggregation processes run independently one from another and continuously. Each aggregation process, depending on the characteristics of the records exposed by the data source, makes use of one or more vocabularies to harmonise the values available in a given field. +In this page, we describe the *vocabulary-based cleaning* operation performed to harmonise the data of the different data sources. A vocabulary is a data structure that defines a list of terms, and for each term defines a list of synonyms: ```xml @@ -39,17 +34,4 @@ The content of the vocabularies can be accessed on [api.openaire.eu/vocabularies Given a value provided in the original records, the cleaning process looks for a synonym and, when found, resolves the corresponding term which is used in turn to build the cleaned record. Each aggregation process applies vocabularies according to their definitions in a given moment of time, however, it could be the case that a vocabulary changes after the aggregation of one data source has finished, thus the aggregated content does not reflect the current status of the controlled vocabularies. -In addition, the integration of ScholeXplorer and DOIBoost and some enrichment processes applied on the raw and on the de-duplicated graph may introduce values that do not comply with the current status of the OpenAIRE controlled vocabularies. For these reasons, we included a final step of cleansing at the end of the workflow materialisation. - -## Filtering - -Bibliographic records that do not meet minimal requirements for being part of the OpenAIRE Graph are eliminated during this phase. -Currently, the only criteria applied horizontally to the entire graph aims at excluding scientific results whose title is not meaningful for citation purposes. -Then, different criteria are applied in the pre-processing of specific sub-collections: - -* [Crossref filtering](/data-provision/aggregation/doiboost#crossref-filtering) - -## Country cleaning - -This phase is responsible for removing the country information from result records that match specific criteria. The need for this phase is driven by the fact that some datasources, although referred of national pertinence, they contain material that is not always related to the given country. - +In addition, the integration of ScholeXplorer and DOIBoost and some enrichment processes applied on the raw and on the de-duplicated graph may introduce values that do not comply with the current status of the OpenAIRE controlled vocabularies. For these reasons, we included a final step of cleansing at the end of the workflow materialisation. \ No newline at end of file diff --git a/docs/data-provision/data-provision.md b/docs/data-provision/data-provision.md index ea103c3..27114ec 100644 --- a/docs/data-provision/data-provision.md +++ b/docs/data-provision/data-provision.md @@ -1,7 +1,8 @@ -# Data provision +# Graph production workflow OpenAIRE collects metadata records from more than 70K scholarly communication sources from all over the world, including Open Access institutional repositories, data archives, journals. All the metadata records (i.e. descriptions of research products) are put together in a data lake, together with records from Crossref, Unpaywall, ORCID, Grid.ac, and information about projects provided by national and international funders. Dedicated inference algorithms applied to metadata and to the full-texts of Open Access publications enrich the content of the data lake with links between research results and projects, author affiliations, subject classification, links to entries from domain-specific databases. Duplicated organisations and results are identified and merged together to obtain an open, trusted, public resource enabling explorations of the scholarly communication landscape like never before.- +
+ diff --git a/docs/data-provision/deduction-and-propagation/bulk-tagging.md b/docs/data-provision/deduction-and-propagation/bulk-tagging.md new file mode 100644 index 0000000..f52188e --- /dev/null +++ b/docs/data-provision/deduction-and-propagation/bulk-tagging.md @@ -0,0 +1,37 @@ +# Deduction + +The Deduction process (also known as “bulk tagging”) enriches each record with new information that can be derived from the existing property values. + +This process is used to associate results to community/research initiatives that are part of OpenAIRE. +As of November 2022, three procedures are in place to relate a research product to a research initiative, infrastructure (RI) or community (RC) based on: + +* subjects: it is possible to specify a list of subjects that are relevant for the RC/RI. Every time one of the subjects is found among the subjects of a result, the result is linked to the RC/RI. + ++ +
+ + +* data sources: it is possible to list a set of data sources relevant for the RC/RI. All the results collected from these data sources will be linked to the RC/RI ++ +
+ + When only some results collected from a datasource are relevant for the RC/RI, it is possible to specify a set of selection constraints (SC) that have to be verified before linking the result to the +community. The selection constraint has the form SC = S1 or S2 or ... or Sn. The generic Si has the form Si = si1 and si2 and ...and sin and each sij is a condition on a specific field of the result. The set of fields that can be specified is F={title, author, contributor, description, orcid}, +while the set of condition can be among V={contains, equals, not_contains, not_equals, contains_ignorecase, equals_ignorecase, not_contains_ignorecase, not_equal_ignorecase}, and the value is free text. +A possible selection criteria can be: “All the products whose contributor contains DARIAH “ + ++ +
+ +* Zenodo community: it is possible to list a set of Zenodo communities relevant for the RC/RI. All the products collected from the listed Zenodo communities are linked to the RC/RI + + ++ +
+ + +The list of subjects, Zenodo communities and data sources used to enrich the products are defined by the managers of the community gateway or infrastructure monitoring dashboard associated with the RC/RI. diff --git a/docs/data-provision/deduction-and-propagation/propagation.md b/docs/data-provision/deduction-and-propagation/propagation.md new file mode 100644 index 0000000..79f0902 --- /dev/null +++ b/docs/data-provision/deduction-and-propagation/propagation.md @@ -0,0 +1,55 @@ +# Propagation + +This process enriches the graph by adding new links and/or new properties. The new information is added by exploiting existing semantic +relationships and values between the involved entities + +As of November 2022, the following procedures are in place: + +* Country propagation: updates the property “country” of a results. This happens when the result is collected from an institutional datasource or when the datasource hosting the result is inserted in a whitelist. For all the results whose hosting datasource verifies one of the conditions above, the country of the organization providing the datasource is added to the country of the result: e.g. publication collected from an institutional repository maintained by an italian university will be enriched with the property “country = IT”. ++ +
+ +* Project propagation: adds a "isProducedBy" relationship (and its inverse) between a Project P and Result R1, if R1 has a strong semantic relationship with another Result R2 and P produces R2: e.g. publication linked to project P “is supplemented by” a dataset D. Dataset D will get the link to project P. The relationships considered for this procedure are “isSupplementedBy” and “isSupplementTo”. ++ +
+* Result to RC/RI through organization propagation. The manager of the RC/RI can specify a set of organizations whose product are relevant for the +community. +Each result having such a relation of affiliation with at least one organization relevant for the RC/RI will be linked to it. ++ +
+ +* Result to RC/RI through semantic relation: extends the set of products linked to a RC/RI by exploiting strong semantic relationships between the results; +e.g. if a result R1 is associated to the community C and is supplemented by a result R2 then the result R2 will be linked to the community. The relationships considered for this procedure are “isSupplementedBy” and “supplements”. ++ +
+* ORCID identifiers to result through semantic relation. This propagation enriches the results by adding ORCID identifiers to authors. The added ORCID will be marked as "potential" since they have been inserted through propagation. +The process considers the set of overlapping authors between results (R1 and R2) linked with a strong semantic relationship (IsSupplementedBy, IsSupplementTo). +For each author A in the overlapping set, if R1 provides the ORCID value for A and R2 does not, then the author A in R2 will be enriched with the information of the ORCID found in R1. + ++ +
+ +* affiliation to organization through institutional repository. This propagation adds one "hasAuthorInstitution" relationship (and its inverse) +between a Result R and Organization O, +if R was collected from a datasource D with type institutional repository, and D was provided by O. ++ +
+ +* affiliation to organization through semantic relation. This propagation adds one "hasAuthorInstitution" relationship (and its inverse) between a +Result R and an Organization O, +if R has an affiliation relation with an organization O1 that is in relation "isChildOf" with O. + ++ +
+ The algorithm exploits only the organization leaves that are in a "IsChildOf" relation with another organization. So far one single step is done ++ +
\ No newline at end of file diff --git a/docs/data-provision/deduplication/deduplication.md b/docs/data-provision/deduplication/deduplication.md index 53d6906..8fb118a 100644 --- a/docs/data-provision/deduplication/deduplication.md +++ b/docs/data-provision/deduplication/deduplication.md @@ -10,7 +10,7 @@ The deduplication process can be divided into three different phases: * Duplicates grouping (transitive closure)- +
### Candidate identification (clustering) diff --git a/docs/data-provision/deduplication/organizations.md b/docs/data-provision/deduplication/organizations.md index a0b029b..488489f 100644 --- a/docs/data-provision/deduplication/organizations.md +++ b/docs/data-provision/deduplication/organizations.md @@ -14,7 +14,7 @@ The data curation activity is twofold, on one end pivots around the disambiguati Duplicates among organizations are therefore managed through three different stages: * *Creation of Suggestions*: executes an automatic workflow that performs the deduplication and prepare new suggestions for the curators to be processed; * *Curation*: manual editing of the organization records performed by the data curators; - * *Creation of Representative Organizations*: executes an automatic workflow that creates curated organizations and exposes them on the OpenAIRE Graph by using the curators' feedback from the OpenOrgs underlying database. + * *Creation of Representative Organizations*: executes an automatic workflow that creates curated organizations and exposes them on the OpenAIRE Research Graph by using the curators' feedback from the OpenOrgs underlying database. The next sections describe the above mentioned stages. @@ -43,7 +43,7 @@ The comparison goes through the following decision tree: 5. *legalname check*: comparison of the normalized `legalnames` with the `Jaro-Winkler` distance to determine if it is higher than `0.9`. If so, a similarity relation is drawn. Otherwise, no similarity relation is drawn.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/1YKInGGtHu09QG4pT2gRLEum4LxU82d4nKkvGNvRQmrg/edit?usp=sharing) @@ -61,7 +61,7 @@ Note that if a curator does not provide a feedback on a similarity relation sugg ### Creation of Representative Organizations -This stage executes an automatic workflow that faces the *duplicates grouping* stage to create representative organizations and to update them on the OpenAIRE Graph. Such organizations are obtained via transitive closure and the relations used comes from the curators' feedback gathered on the OpenOrgs underlying Database. +This stage executes an automatic workflow that faces the *duplicates grouping* stage to create representative organizations and to update them on the OpenAIRE Research Graph. Such organizations are obtained via transitive closure and the relations used comes from the curators' feedback gathered on the OpenOrgs underlying Database. #### Duplicates grouping (transitive closure) diff --git a/docs/data-provision/deduplication/research-products.md b/docs/data-provision/deduplication/research-products.md index 3000e24..4d68c25 100644 --- a/docs/data-provision/deduplication/research-products.md +++ b/docs/data-provision/deduplication/research-products.md @@ -34,7 +34,7 @@ The comparison goes through different stages: 5. *strong check*: comparison composed by three substages involving the (i) comparison of the author list sizes and the version of the record to determine if they are coherent, (ii) comparison of the record titles with the Levenshtein distance to determine if it is higher than 0.99, (iii) "smart" comparison of the author lists to check if common authors are more than 60%.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/19SIilTp1vukw6STMZuPMdc0pv0ODYCiOxP7OU3iPWK8/edit?usp=sharing) @@ -47,7 +47,7 @@ The comparison goes through different stages: 3. *strong check*: comparison of the record titles with Levenshtein distance. If the measure is above 0.99, then the similarity relation is drawn- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/19gd1-GTOEEo6awMObGRkYFhpAlO_38mfbDFFX0HAkuo/edit?usp=sharing) @@ -57,7 +57,7 @@ For each pair of datasets or other types of research products in a cluster the s The decision tree is almost identical to the publication decision tree, with the only exception of the *instance type check* stage. Since such type of record does not have a relatable instance type, the check is not performed and the decision tree node is skipped.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/1uBa7Bw2KwBRDUYIfyRr_Keol7UOeyvMNN7MPXYLg4qw/edit?usp=sharing) diff --git a/docs/data-provision/enrichment/_category_.json b/docs/data-provision/enrichment-by-mining/_category_.json similarity index 100% rename from docs/data-provision/enrichment/_category_.json rename to docs/data-provision/enrichment-by-mining/_category_.json diff --git a/docs/data-provision/enrichment/acks.md b/docs/data-provision/enrichment-by-mining/acks.md similarity index 79% rename from docs/data-provision/enrichment/acks.md rename to docs/data-provision/enrichment-by-mining/acks.md index 903e0b4..eed8cb1 100644 --- a/docs/data-provision/enrichment/acks.md +++ b/docs/data-provision/enrichment-by-mining/acks.md @@ -4,8 +4,7 @@ sidebar_position: 3 # Extraction of acknowledged concepts -***Short description:*** -Scans the plaintexts of publications for acknowledged concepts, including grant identifiers (projects) of funders, accession numbers of bioetities, EPO patent mentions, as well as custom concepts that can link research objects to specific research communities and initiatives in OpenAIRE. +***Short description:*** Scans the plaintexts of publications for acknowledged concepts, including grant identifiers (projects) of funders, accession numbers of bioetities, EPO patent mentions, as well as custom concepts that can link research objects to specific research communities and initiatives in OpenAIRE. ***Algorithmic details:*** The algorithm processes the publication's fulltext and extracts references to acknowledged concepts. It applies pattern matching and string join between the fulltext and a target database which contains the title, the acronym and the identifier of the searched concept. diff --git a/docs/data-provision/enrichment/affiliation_matching.md b/docs/data-provision/enrichment-by-mining/affiliation_matching.md similarity index 96% rename from docs/data-provision/enrichment/affiliation_matching.md rename to docs/data-provision/enrichment-by-mining/affiliation_matching.md index fb2ce11..539e51b 100644 --- a/docs/data-provision/enrichment/affiliation_matching.md +++ b/docs/data-provision/enrichment-by-mining/affiliation_matching.md @@ -4,8 +4,7 @@ sidebar_position: 1 # Affiliation matching -***Short description:*** -The goal of the affiliation matching module is to match affiliations extracted from the pdf and xml documents with organizations from the OpenAIRE organization database. +***Short description:*** The goal of the affiliation matching module is to match affiliations extracted from the pdf and xml documents with organizations from the OpenAIRE organization database. ***Algorithmic details:*** diff --git a/docs/data-provision/enrichment/citation_matching.md b/docs/data-provision/enrichment-by-mining/citation_matching.md similarity index 87% rename from docs/data-provision/enrichment/citation_matching.md rename to docs/data-provision/enrichment-by-mining/citation_matching.md index 7cf56db..01fcf37 100644 --- a/docs/data-provision/enrichment/citation_matching.md +++ b/docs/data-provision/enrichment-by-mining/citation_matching.md @@ -1,7 +1,6 @@ # Citation matching -***Short description:*** -During a citation matching task, bibliographic entries are linked to the documents that they reference. The citation matching module - one of the modules of the Information Inference Service (IIS) - receives as an input a list of documents accompanied by their metadata and bibliography. Among them, it discovers links described above and returns them as a list. In this document we shall evaluate if the module has been properly integrated with the whole +***Short description:*** During a citation matching task, bibliographic entries are linked to the documents that they reference. The citation matching module - one of the modules of the Information Inference Service (IIS) - receives as an input a list of documents accompanied by their metadata and bibliography. Among them, it discovers links described above and returns them as a list. In this document we shall evaluate if the module has been properly integrated with the whole system and assess the accuracy of the algorithm used. It is worth mentioning that the implemented algorithm has been described in detail in arXiv:1303.6906 [cs.IR]1. However, in the referenced paper the algorithm was tested on small datasets, but here we will focus on larger datasets, which are expected to be analysed by the system in the production environment. ***Algorithmic details:*** diff --git a/docs/data-provision/enrichment/cites.md b/docs/data-provision/enrichment-by-mining/cites.md similarity index 90% rename from docs/data-provision/enrichment/cites.md rename to docs/data-provision/enrichment-by-mining/cites.md index 9a45946..f7d8158 100644 --- a/docs/data-provision/enrichment/cites.md +++ b/docs/data-provision/enrichment-by-mining/cites.md @@ -4,8 +4,7 @@ sidebar_position: 4 # Extraction of cited concepts -***Short description:*** -Scans the plaintexts of publications for cited concepts, currently for references to datasets and software URIs. +***Short description:*** Scans the plaintexts of publications for cited concepts, currently for references to datasets and software URIs. ***Algorithmic details:*** The algorithm extracts citations to specific datasets and software. It extracts the citation section of a publication's fulltext and applies string matching against a target database which includes an inverted index with dataset/software titles, urls and other metadata. diff --git a/docs/data-provision/enrichment/classifies.md b/docs/data-provision/enrichment-by-mining/classifies.md similarity index 100% rename from docs/data-provision/enrichment/classifies.md rename to docs/data-provision/enrichment-by-mining/classifies.md diff --git a/docs/data-provision/enrichment/documents_similarity.md b/docs/data-provision/enrichment-by-mining/documents_similarity.md similarity index 90% rename from docs/data-provision/enrichment/documents_similarity.md rename to docs/data-provision/enrichment-by-mining/documents_similarity.md index c67700c..1e02b95 100644 --- a/docs/data-provision/enrichment/documents_similarity.md +++ b/docs/data-provision/enrichment-by-mining/documents_similarity.md @@ -1,7 +1,6 @@ # Documents similarity -***Short description:*** -Document similarity module is responsible for finding similar documents among the ones available in the OpenAIRE Information Space. It produces "similarity" links between the documents stored in the OpenAIRE Information Space. Each link has a similarity score from [0,1] range assigned; it is expected that the higher the score, the more similar are the documents with respect to their content. +***Short description:*** Document similarity module is responsible for finding similar documents among the ones available in the OpenAIRE Information Space. It produces "similarity" links between the documents stored in the OpenAIRE Information Space. Each link has a similarity score from [0,1] range assigned; it is expected that the higher the score, the more similar are the documents with respect to their content. ***Algorithmic details:*** The similarity between two documents is expressed as the similarity between weights of their common terms (i.e., words being reduced to their root form) within a context of all terms from the first and the second document. In this approach, the computation can be divided into three consecutive steps: diff --git a/docs/data-provision/enrichment-by-mining/img.png b/docs/data-provision/enrichment-by-mining/img.png new file mode 100644 index 0000000..d77d197 Binary files /dev/null and b/docs/data-provision/enrichment-by-mining/img.png differ diff --git a/docs/data-provision/enrichment/metadata_extraction.md b/docs/data-provision/enrichment-by-mining/metadata_extraction.md similarity index 94% rename from docs/data-provision/enrichment/metadata_extraction.md rename to docs/data-provision/enrichment-by-mining/metadata_extraction.md index ef930bd..4ade667 100644 --- a/docs/data-provision/enrichment/metadata_extraction.md +++ b/docs/data-provision/enrichment-by-mining/metadata_extraction.md @@ -1,7 +1,6 @@ # Metadata extraction -***Short description:*** -Metadata Extraction algorithm is responsible for plaintext and metadata extraction out of the PDF documents. It based on [CERMINE](http://cermine.ceon.pl/about.html) project. +***Short description:*** Metadata Extraction algorithm is responsible for plaintext and metadata extraction out of the PDF documents. It based on [CERMINE](http://cermine.ceon.pl/about.html) project. CERMINE is a comprehensive open source system for extracting metadata and content from scientific articles in born-digital form. The system is able to process documents in PDF format and extracts: diff --git a/docs/data-provision/enrichment/enrichment.md b/docs/data-provision/enrichment/enrichment.md deleted file mode 100644 index 8ff8432..0000000 --- a/docs/data-provision/enrichment/enrichment.md +++ /dev/null @@ -1,44 +0,0 @@ -# Enrichment - -## Mining - -The OpenAIRE Graph is enriched by links mined by OpenAIRE’s full-text mining algorithms that scan the plaintexts of publications for funding information, references to datasets, software URIs, accession numbers of bioetities, and EPO patent mentions. Custom mining modules also link research objects to specific research communities, initiatives and infrastructures. In addition, other inference modules provide content-based document classification, document similarity, citation matching, and author affiliation matching. - -**Project mining** in OpenAIRE text mines the full-texts of publications in order to extract matches to funding project codes/IDs. The mining algorithm works by utilising (i) the grant identifier, and (ii) the project acronym (if available) of each project. The mining algorithm: (1) Preprocesses/normalizes the full-texts using several functions, which depend on the characteristics of each funder (i.e., the format of the grant identifiers), such as stopword and/or punctuation removal, tokenization, stemming, converting to lowercase; then (2) String matching of grant identifiers against the normalized text is done using database techniques; and (3) The results are validated and cleaned using the context near the match by looking at the context around the matched ID for relevant metadata and positive or negative words/phrases, in order to calculate a confidence value for each publication-->project link. A confidence threshold is set to optimise high accuracy while minimising false positives, such as matches with page or report numbers, post/zip codes, parts of telephone numbers, DOIs or URLs, accession numbers. The algorithm also applies rules for disambiguating results, as different funders can share identical project IDs; for example, grant number 633172 could refer to H2020 project EuroMix but also to Australian-funded NHMRC project “Brain activity (EEG) analysis and brain imaging techniques to measure the neurobiological effects of sleep apnea”. Project mining works very well and was the first Text & Data Mining (TDM) service of OpenAIRE. Performance results vary from funder to funder but precision is higher than 98% for all funders and 99.5% for EC projects. Recall is higher than 95% (99% for EC projects), when projects are properly acknowledged using project/grant IDs. - -**Dataset extraction** runs on publications full-texts as described in “High pass text-filtering for Citation matching”, TPDL 2017[1]. In particular, we search for citations to datasets using their DOIs, titles and other metadata (i.e., dates, creator names, publishers, etc.). We extract parts of the text which look like citations and search for datasets using database join and pattern matching techniques. Based on the experiments described in the paper, precision of the dataset extraction module is 98.5% and recall is 97.4% but it is also probably overestimated since it does not take into account corruptions that may take place during pdf to text extraction. It is calculated on the extracted full-texts of small samples from PubMed and arXiv. - -**Software extraction** runs also on parts of the text which look like citations. We search the citations for links to software in open software repositories, specifically github, sourceforge, bitbucket and the google code archive. After that, we search for links that are included in Software Heritage (SH, https://www.softwareheritage.org) and return the permanent URL that SH provides for each software project. We also enrich this content with user names, titles and descriptions of the software projects using web mining techniques. Since software mining is based on URL matching, our precision is 100% (we return a software link only if we find it in the text and there is no need to disambiguate). As for recall rate, this is not calculable for this mining task. Although we apply all the necessary normalizations to the URLs in order to overcome usual issues (e.g., http or https, existence of www or not, lower/upper case), we do not calculate cases where a software is mentioned using its name and not by a link from the supported software repositories. - -**For the extraction of bio-entities**, we focus on Protein Data Bank (PDB) entries. We have downloaded the database with PDB codes and we update it regularly. We search through the whole publication’s full-text for references to PDB codes. We apply disambiguation rules (e.g., there are PDB codes that are the same as antibody codes or other issues) so that we return valid results. Current precision is 98%. Although it's risky to mention recall rates since these are usually overestimated, we have calculated a recall rate of 98% using small samples from pubmed publications. Moreover, our technique is able to identify about 30% more links to proteins than the ones that are tagged in Pubmed xmls. - -**Other text-mining modules** include mining for links to EPO patents, or custom mining modules for linking research objects to specific research communities, initiatives and infrastructures, e.g. COVID-19 mining module. Apart from text-mining modules, OpenAIRE also provides a document classification service that employs analysis of free text stemming from the abstracts of the publications. The purpose of applying a document classification module is to assign a scientific text one or more predefined content classes. In OpenAIRE, the currently used taxonomies are arXiv, MeSH (Medical Subject Headings), ACM and DDC (Dewey Decimal Classification, or Dewey Decimal System). - -## Bulk Tagging/Deduction - -The Deduction process (also known as “bulk tagging”) enriches each record with new information that can be derived from the existing property values. - -As of September 2020, three procedures are in place to relate a research product to a research initiative, infrastructure (RI) or community (RC) based on: - -* subjects (2.7M results tagged) - -* Zenodo community (16K results tagged) - -* the data source it comes from (250K results tagged) - -The list of subjects, Zenodo communities and data sources used to enrich the products are defined by the managers of the community gateway or infrastructure monitoring dashboard associated with the RC/RI. - -## Propagation - -This process “propagates” properties and links from one product to another if between the two there is a “strong” semantic relationship. - -As of September 2020, the following procedures are in place: -Propagation of the property “country” to results from institutional repositories: e.g. publication collected from an institutional repository maintained by an italian university will be enriched with the property “country = IT”. - -* Propagation of links to projects: e.g. publication linked to project P “is supplemented by” a dataset D. Dataset D will get the link to project P. The relationships considered for this procedure are “isSupplementedBy” and “supplements”. - -* Propagation of related community/infrastructure/initiative from organizations to products via affiliation relationships: e.g. a publication with an author affiliated with organization O. The manager of the community gateway C declared that the outputs of O are all relevant for his/her community C. The publication is tagged as relevant for C. - -* Propagation of related community/infrastructure/initiative to related products: e.g. publication associated to community C is supplemented by a dataset D. Dataset D will get the association to C. The relationships considered for this procedure are “isSupplementedBy” and “supplements”. - -* Propagation of ORCID identifiers to related products, if the products have the same authors: e.g. publication has ORCID for its authors and is supplemented by a dataset D. Dataset D has the same authors as the publication. Authors of D are enriched with the ORCIDs available in the publication. The relationships considered for this procedure are “isSupplementedBy” and “supplements”. \ No newline at end of file diff --git a/docs/data-provision/finalisation.md b/docs/data-provision/finalisation.md new file mode 100644 index 0000000..92a875c --- /dev/null +++ b/docs/data-provision/finalisation.md @@ -0,0 +1,18 @@ +# Finalisation + +At the very end of the graph production workflow, a step is dedicated to perform certain finalisation operations, that we describe in this page, +aiming to improve the overall quality of the data. +The output of this final step is the final version of the OpenAIRE Research Graph. + +## Filtering + +Bibliographic records that do not meet minimal requirements for being part of the OpenAIRE Research Graph are eliminated during this phase. +Currently, the only criteria applied horizontally to the entire graph aims at excluding scientific results whose title is not meaningful for citation purposes. +Then, different criteria are applied in the pre-processing of specific sub-collections: + +* [Crossref filtering](/data-provision/aggregation/non-compatible-sources/doiboost#crossref-filtering) + +## Country cleaning + +This phase is responsible for removing the country information from result records that match specific criteria. The need for this phase is driven by the fact that some datasources, although referred of national pertinence, they contain material that is not always related to the given country. + diff --git a/docs/data-provision/indexing.md b/docs/data-provision/indexing.md index f5b2e21..579c230 100644 --- a/docs/data-provision/indexing.md +++ b/docs/data-provision/indexing.md @@ -1,20 +1,16 @@ ---- -sidebar_position: 5 ---- - # Indexing -The final version of the OpenAIRE Graph is indexed on a Solr server that is used by the OpenAIRE portals (EXPLORE, CONNECT, PROVIDE) and APIs, the latter adopted by several third-party applications and organizations, such as: +The final version of the OpenAIRE Research Graph is indexed on a Solr server that is used by the OpenAIRE portals ([EXPLORE](https://explore.openaire.eu), [CONNECT](https://connect.openaire.eu), [PROVIDE](https://provide.openaire.eu)) and APIs, the latter adopted by several third-party applications and organizations, such as: * The OpenAIRE Graph APIs and Portals will offer to the EOSC (European Open Science Cloud) an Open Science Resource Catalogue, keeping an up to date map of all research results (publications, datasets, software), services, organizations, projects, funders in Europe and beyond. * DSpace & EPrints repositories can install the OpenAIRE plugin to expose OpenAIRE compliant metadata records via their OAI-PMH endpoint and offer to researchers the possibility to link their depositions to the funding project, by selecting it from the list of project provided by OpenAIRE. -* EC participant portal (Sygma - System for Grant Management) uses the OpenAIRE API in the “Continuous Reporting” section. Sygma automatically fetches from the OpenAIRE Search API the list of publications and datasets in the OpenAIRE Graph that are linked to the project. The user can select the research products from the list and easily compile the continuous reporting data of the project. +* EC participant portal (Sygma - System for Grant Management) uses the OpenAIRE API in the “Continuous Reporting” section. Sygma automatically fetches from the OpenAIRE Search API the list of publications and datasets in the OpenAIRE Research Graph that are linked to the project. The user can select the research products from the list and easily compile the continuous reporting data of the project. * ScholExplorer is used by different players of the scholarly communication ecosystem. For example, [Elsevier](https://www.elsevier.com/authors/tools-and-resources/research-data/data-base-linking) uses its API to make the links between publications and datasets automatically appear on ScienceDirect. -ScholExplorer indexes the links among the four major types of research products (API v3) available in the OpenAIRE Graph and makes them available through an HTTP API that allows +ScholExplorer indexes the links among the four major types of research products (API v3) available in the OpenAIRE Research Graph and makes them available through an HTTP API that allows to search them by the following criteria: * Links whose source object has a given PID or PID type; * Links whose source object has been published by a given data source ("data source as publisher"); diff --git a/docs/data-provision/enrichment/impact-scores.md b/docs/data-provision/indicators-ingestion/impact-scores.md similarity index 99% rename from docs/data-provision/enrichment/impact-scores.md rename to docs/data-provision/indicators-ingestion/impact-scores.md index d3db939..7754a04 100644 --- a/docs/data-provision/enrichment/impact-scores.md +++ b/docs/data-provision/indicators-ingestion/impact-scores.md @@ -1,7 +1,3 @@ ---- -sidebar_position: 2 ---- - # Impact indicators This page summarises all calculated impact indicators, which are included into the [measure](/data-model/entities/other#measure) property. diff --git a/docs/data-provision/indicators-ingestion/usage-counts.md b/docs/data-provision/indicators-ingestion/usage-counts.md new file mode 100644 index 0000000..fd98a43 --- /dev/null +++ b/docs/data-provision/indicators-ingestion/usage-counts.md @@ -0,0 +1,7 @@ +# Usage Statistics Indicators + +Usage Statistics indicators for research products, like publications, datasets,etc., are an important complement to other (traditional and alternative) bibliometric indicators to provide a comprehensive and recent view of the impact of such resources but also about their authors, institutions and the platforms themselves. They are taking into account different levels of information: the usage of data sources, the usage of individual items in the context of their resource type and the usage of individual web resources or files. + +Usage Statistics Indicators are built by the OpenAIRE's UsageCounts Service. The service collects usage data and consolidated usage statistics reports respectively, from its distributed network of data providers (repositories, e-journals, CRIS) by utilizing open standards and protocols and delivers reliable, consolidated and comparable usage metrics like counts of item downloads and metadata views conformant to COUNTER Code of Practice. + +You can find more information about the UsageCounts service [here](https://usagecounts.openaire.eu/). \ No newline at end of file diff --git a/docs/data-provision/merge-by-id.md b/docs/data-provision/merge-by-id.md new file mode 100644 index 0000000..199500f --- /dev/null +++ b/docs/data-provision/merge-by-id.md @@ -0,0 +1,28 @@ +# Merge by id + +In the metadata aggregation system it is common to find the same record provided by +different datasources and, sometimes, even inside the same datasource (especially in +case of aggregators). As the harmonisation processes are performed per datasource +contents, the relative records are the output of different mapping implementations. +This approach has the advantage to be deeply customisable to catch datasource specific +aspects, but it leaves room for inconsistencies when evaluating the different mappings +across the various datasources. + +This phase is therefore responsible to compensate for such inconsistencies and performs +a global grouping of every record available in the graph: + +- entities are grouped by [`id`](../data-model/entities/result#id) +- relations are grouped by [`source`, `target`, `reltype`](../data-model/relationships#the-relationship-object) + +This ensures that the same record, possibly assigned to different types by different +mappings, appears only once in the graph and under a single typing. In case of clashing +identifiers, the properties are merged (including the provencance information), considering +the following precedence order for the result typing: + +``` +publication > dataset > software > other +``` + +The same holds for relationships, as the same (e.g.) DOI-to-DOI citation relation could +be aggregated from multiple sources, this grouping phase would collapse all the different +duplicates onto a single relation that would however include all the individual provenances. diff --git a/docs/data-provision/stats.md b/docs/data-provision/stats.md index 6f1cb17..9d0de86 100644 --- a/docs/data-provision/stats.md +++ b/docs/data-provision/stats.md @@ -1,7 +1,12 @@ ---- -sidebar_position: 6 ---- - # Stats analysis -The OpenAIRE Graph is also processed by a pipeline for extracting the statistics and producing the charts for funders, research initiative, infrastructures, and policy makers that you can see on MONITOR. Based on the information available on the graph, OpenAIRE provides a set of indicators for monitoring the funding and research impact and the uptake of Open Science publishing practices, such as Open Access publishing of publications and datasets, availability of interlinks between research products, availability of post-print versions in institutional or thematic Open Access repositories, etc. \ No newline at end of file +The OpenAIRE Graph is also processed by a pipeline for extracting the statistics +and producing the charts for funders, research initiative, research infrastructures, +and policymakers available on [MONITOR](https://monitor.openaire.eu). + +Based on the information available on the graph, OpenAIRE provides a set of +indicators for monitoring the funding and research impact and the uptake of +Open Science publishing practices, such as Open Access publishing of publications +and datasets, availability of interlinks between research products, availability +of post-print versions in institutional or thematic Open Access repositories, etc. + diff --git a/docs/download.md b/docs/download.md deleted file mode 100644 index 6a6e6a8..0000000 --- a/docs/download.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -sidebar_position: 4 ---- - -# Bulk downloads - -In order to facilitate users, different dumps are available. All are available under the Zenodo community called [OpenAIRE Research Graph](https://zenodo.org/communities/openaire-research-graph). -Here we provide detailed documentation about the full dump: - -* JSON dump: https://doi.org/10.5281/zenodo.3516917 -* JSON schema: https://doi.org/10.5281/zenodo.4238938 - -:::note Tip! - -For a visual and interactive overview of the JSON schema, we suggest to use a JSON schema viewer like [jsonschemaviewer](https://navneethg.github.io/jsonschemaviewer/) (you just need to copy the schema and then you can easily navigate through the nodes). - -::: diff --git a/docs/downloads/alternative-model/cfhb.md b/docs/downloads/alternative-model/cfhb.md new file mode 100644 index 0000000..db13233 --- /dev/null +++ b/docs/downloads/alternative-model/cfhb.md @@ -0,0 +1,30 @@ +--- + +sidebar_position: 1 + +--- + +# CfHbKeyValue + +Information about the sources from which the record has been collected. + + + @JsonSchema(description = "the OpenAIRE identifier of the data source") +### key +_Type: String • Cardinality: ONE_ + +the OpenAIRE identifier of the data source + +```json +"key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2" +``` + +### value +_Type: String • Cardinality: ONE_ + +The name of the data source. + +```json +"value":"Crossref" +``` + diff --git a/docs/downloads/alternative-model/communityInstance.md b/docs/downloads/alternative-model/communityInstance.md new file mode 100644 index 0000000..0ec83ca --- /dev/null +++ b/docs/downloads/alternative-model/communityInstance.md @@ -0,0 +1,37 @@ +--- + +sidebar_position: 1 + +--- + +# CommunityInstance + +It is a subclass of [Instance](../../data-model/entities/result#instance) extended with information regarding the collection and hosting source for this materialization of the result. + +### hostedby +_Type: [CfHbKeyValue](./cfhb) • Cardinality: ONE_ + +Information about the source from which the instance can be viewed or downloaded. + +```json + +"hostedby": { + "key": "10|issn___print::35ee75a5ad42581d604be113a8f56427", + "value": "New Phytologist" + }, + +``` + +### collectedfrom +_Type: [CfHbKeyValue](./cfhb) • Cardinality: ONE_ + +Information about the source from which the record has been collected + + +```json + +"collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", + "value": "Crossref" + } +``` \ No newline at end of file diff --git a/docs/downloads/alternative-model/context.md b/docs/downloads/alternative-model/context.md new file mode 100644 index 0000000..e08ea69 --- /dev/null +++ b/docs/downloads/alternative-model/context.md @@ -0,0 +1,46 @@ +--- + +sidebar_position: 1 + +--- + +# Context + +Information related to research initiative/community (RI/RC) related to the result. + +### code +_Type: String • Cardinality: ONE_ + +Code identifying the RI/RC. + +```json +"code":"sdsn-gr" + +``` + + +### label +_Type: String • Cardinality: ONE_ + +Label of the RI/RC. + +```json +"label":"SDSN - Greece" +``` + +### provenance +_Type: [Provenance](/data-model/entities/other#provenance-2) • Cardinality: MANY_ + +Why this result is associated to the RI/RC. + +```json + +"provenance":[{ + "provenance":"Inferred by OpenAIRE", + "trust":"0.9" + }, + ... + ] + +``` + diff --git a/docs/downloads/alternative-model/extendedresult.md b/docs/downloads/alternative-model/extendedresult.md new file mode 100644 index 0000000..2284479 --- /dev/null +++ b/docs/downloads/alternative-model/extendedresult.md @@ -0,0 +1,141 @@ +--- + +sidebar_position: 1 + +--- + + +# Extended Result + + +It is a subclass of [Result](/data-model/entities/result) extended with information regarding projects (and funders), research communities/infrastructure and related data sources. + + + +### projects + +_Type: [Project](project.md) • Cardinality: MANY_ + + +List of projects (i.e. grants) that (co-)funded the production of the research results. + + +```json + + +"projects": [ + { + "id": "40|corda__h2020::94c4a066401e22002c4811a301bb4655", + "code": "727929", + "acronym": "TomRes", + "title": "A NOVEL AND INTEGRATED APPROACH TO INCREASE MULTIPLE AND COMBINED STRESS TOLERANCE IN PLANTS USING TOMATO AS A MODEL", + "funder": { + "shortName": "EC", + "name": "European Commission", + "jurisdiction": "EU", + "fundingStream": "H2020" + }, + "provenance": { + "provenance": "Harvested", + "trust": "0.900000000000000022" + }, + "validated": { + "validationDate": "2021-0101", + "validatedByFunder": true + } + }, + ... + ] + +``` + +### context + +_Type: [Context](./context) • Cardinality: MANY_ + + +Reference to relevant research infrastructure, initiative or communities (RI/RC) among those collaborating with OpenAIRE. Please see https://connect.openaire.eu that are publicly visible. + + +```json + + +"context":[ + { + "code":"sdsn-gr", + "label":"SDSN - Greece", + "provenance":[ + { + "provenance":"Inferred by OpenAIRE", + "trust":"0.9" + } + ] + }, + ... + ] + +``` + + + +### collectedfrom + +_Type: [CfHbKeyValue](./cfhb) • Cardinality: MANY_ + + +Information about the sources from which the record has been collected. + + +```json + +"collectedfrom":[ + { + "key":"10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", + "value":"Crossref" + }, + ... + ] + +``` + + +### instance + +_Type: [CommunityInstance](./communityInstance) • Cardinality: MANY_ + +Information about the source from which the instance can be viewed or downloaded. + +```json + + +"instance": [ + { + "license": "http://doi.wiley.com/10.1002/tdm_license_1.1", + "accessright": { + "code": "c_16ec", + "label": "RESTRICTED", + "scheme": "http://vocabularies.coar-repositories.org/documentation/access_rights/", + "openAccessRoute": null + }, + "type": "Article", + "url": [ + "https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1111%2Fnph.15014", + "http://onlinelibrary.wiley.com/wol1/doi/10.1111/nph.15014/fullpdf", + "http://dx.doi.org/10.1111/nph.15014" + ], + "publicationdate": "2018-02-09", + "refereed": "UNKNOWN", + "hostedby": { + "key": "10|issn___print::35ee75a5ad42581d604be113a8f56427", + "value": "New Phytologist" + }, + "collectedfrom": { + "key": "10|openaire____::081b82f96300b6a6e3d282bad31cb6e2", + "value": "Crossref" + } + }, + ... + ] + + +``` diff --git a/docs/downloads/alternative-model/funder.md b/docs/downloads/alternative-model/funder.md new file mode 100644 index 0000000..1da93a9 --- /dev/null +++ b/docs/downloads/alternative-model/funder.md @@ -0,0 +1,72 @@ +--- + +sidebar_position: 1 + +--- + +# Funder + + +Information about the funder funding the project. + + +### fundingStream + +_Type: String • Cardinality: ONE_ + + +Funding information for the project. + + +```json + +"funding_stream": "H2020" + + +``` + +### jurisdiction + +_Type: String • Cardinality: ONE_ + + +Geographical jurisdiction (e.g. for European Commission is EU, for Croatian Science Foundation is HR). + + +```json + +"jurisdiction": "EU" + +``` + + +### name + +_Type: String • Cardinality: ONE_ + + +The name of the funder. + + +```json + +"name": "European Commission" + +``` + + +### shortName + +_Type: String • Cardinality: ONE_ + + +The short name of the funder. + + +```json + +"shortName": "EC" + +``` + + diff --git a/docs/downloads/alternative-model/project.md b/docs/downloads/alternative-model/project.md new file mode 100644 index 0000000..774b487 --- /dev/null +++ b/docs/downloads/alternative-model/project.md @@ -0,0 +1,134 @@ +--- + +sidebar_position: 1 + +--- + + + +# Project + + +The information about the projects related to the result. + + +### id + +_Type: String • Cardinality: ONE_ + + +Main entity identifier, created according to the [OpenAIRE entity identifier and PID mapping policy](../../data-model/pids-and-identifiers). + + +```json + +"id": "40|corda__h2020::70ea22400fd890c5033cb31642c4ae68" + +``` + + +### code + +_Type: String • Cardinality: ONE_ + + +Τhe grant agreement code of the project. + + +```json + +"code": "777541" + +``` + + +### acronym + +_Type: String • Cardinality: ONE_ + + +Project's acronym. + + +```json + +"acronym": "OpenAIRE-Advance" + +``` + + +### title + +_Type: String • Cardinality: ONE_ + + +Project's title. + + +```json + +"title": "OpenAIRE Advancing Open Scholarship" + +``` + + +### funder + +_Type [Funder](funder.md) • Cardinality: ONE_ + + +Information about the funder funding the project. + + +```json + + +"funder": { + "shortName": "EC", + "name": "European Commission", + "jurisdiction": "EU", + "fundingStream": "H2020" + } + + +``` + +### provenace + + +_Type [Provenance](../../data-model/entities/other#provenance-2) • Cardinality: ONE_ + + +The reason why the project is associated to the result. + + +```json + + +"provenance": { + "provenance": "Harvested", + "trust": "0.900000000000000022" + } + +``` + + +### validated + + +_Type [Validated](validated.md) • Cardinality: ONE_ + + +Specifies it the association between the project and the result was validated. + + +```json + + +"validated": { + "validationDate": "2021-0101", + "validatedByFunder": true + } + +``` + diff --git a/docs/downloads/alternative-model/validated.md b/docs/downloads/alternative-model/validated.md new file mode 100644 index 0000000..e92b2c9 --- /dev/null +++ b/docs/downloads/alternative-model/validated.md @@ -0,0 +1,41 @@ +--- + +sidebar_position: 1 + +--- + +# Validated + + +Information about the validtion of the association between the result and the funding information. + + +### validationDate + +_Type: String • Cardinality: ONE_ + + +When OpenAIRE collected the association between the funding and the result from an authoritative source (i.e. Sygma). + + +```json + +"validationDate": "2021-0101" + +``` + + +### validatedByFunder + +_Type: Boolean • Cardinality: ONE_ + + +Specifies if the validation comes from the funder. + + +```json + + +"validatedByFunder": true + +``` \ No newline at end of file diff --git a/docs/downloads/beginners-kit.md b/docs/downloads/beginners-kit.md new file mode 100644 index 0000000..5bb8548 --- /dev/null +++ b/docs/downloads/beginners-kit.md @@ -0,0 +1,6 @@ +--- +sidebar_position: 2 +--- + +# Beginners kit + diff --git a/docs/downloads/full-graph.md b/docs/downloads/full-graph.md new file mode 100644 index 0000000..9277dbe --- /dev/null +++ b/docs/downloads/full-graph.md @@ -0,0 +1,48 @@ +--- +sidebar_position: 1 +--- + +# Full graph dump + +You can download the full OpenAIRE Research Graph Dump as well as its schema from the following links: + + Dataset: https://doi.org/10.5281/zenodo.3516917 + + Schema: https://doi.org/10.5281/zenodo.4238938 + +The schema used to dump this dataset mirrors the one described in the [Data Model](../data-model). +This dataset is licensed under a Creative Commons Attribution 4.0 International License. +It is composed of several files so that you can download the parts you are interested into. The files are named after the entity they store (i.e. publication, dataset). Each file is at most 10GB and it is +a tar archive containing gz files, each with one json per line. + +## How to acknowledge this work + +Open Science services are open and transparent and survive thanks to your active support and to the visibility and reward they gather. If you use one of the [OpenAIRE Research Graph dumps](https://doi.org/10.5281/zenodo.3516917) for your research, please provide a proper citation following the recommendation that you find on the dump's Zenodo page or as provided below. + +:::note How to cite + +Manghi P., Atzori C., Bardi A., Baglioni M., Schirrwagen J., Dimitropoulos H., La Bruzzo S., Foufoulas I., Mannocci A., Horst M., Czerniak A., Kiatropoulou K., Kokogiannaki A., De Bonis M., Artini M., Ottonello E., Lempesis A., Ioannidis A., Manola N., Principe P. (2022). "OpenAIRE Research Graph Dump", *Dataset*, Zenodo. [doi:10.5281/zenodo.3516917](https://doi.org/10.5281/zenodo.3516917) ([BibTex](/bibtex/OpenAIRE_Research_Graph_dump.bib)) +::: + +Please also consider citing [other relevant research products](/publications#relevant-research-products) that can be of interest. + +Also consider adding one of the following badges to your service with the appropriate link to [our website](https://graph.openaire.eu); click on the badges below to download the respective badge image files. + + + diff --git a/docs/downloads/related-datasets.md b/docs/downloads/related-datasets.md new file mode 100644 index 0000000..b342307 --- /dev/null +++ b/docs/downloads/related-datasets.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 4 +--- + +# Other related datasets + +In this page, we list other related datasets; please refer to their respective schema definitions for the data model they follow. + +## The dump of ScholeXplorer + + Dataset: https://doi.org/10.5281/zenodo.6338616 + + Schema (Scholix version 3): https://doi.org/10.5281/zenodo.1120275 + + Schema (Scholix version 4): https://doi.org/10.5281/zenodo.6351557 + +This dataset is licensed under a CC0 1.0 Universal (CC0 1.0) Public Domain Dedication. +The dataset contains the GZ-compressed dump of the Scholix links exposed by the OpenAIRE ScholeXplorer service. + +## The OpenAIRE LOD dump + +Dataset (RDF dump): https://doi.org/10.5281/zenodo.609943 + +LOD Ontology: http://lod.openaire.eu/vocab + +SPARQL Endpoint: http://lod.openaire.eu/sparql + + +The OpenAIRE Linked Open Data (LOD) Services and their integration with the OpenAIRE information space have been released as a beta version. The LOD exporting process started with a specification of the OpenAIRE data model as an RDF vocabulary, and then mapping of the OpenAIRE data to the graph-based RDF data model. To interlink the OpenAIRE data with related data on the Web, we have identified a list of potential datasets to interlinked with, including the DBpedia dataset extracted from Wikipedia and the publication databases DBLP and CiteSeer. +Please refer [here](http://lod.openaire.eu/documentation) for more details on the LOD documentation. \ No newline at end of file diff --git a/docs/downloads/subgraphs.md b/docs/downloads/subgraphs.md new file mode 100644 index 0000000..af85628 --- /dev/null +++ b/docs/downloads/subgraphs.md @@ -0,0 +1,68 @@ +--- +sidebar_position: 3 +--- + +# Sub-graph dumps + +In order to facilitate users, different dumps are available under the Zenodo community called [OpenAIRE Research Graph](https://zenodo.org/communities/openaire-research-graph). +This page lists all alternative dumps currently available. + + +## The OpenAIRE COVID-19 dump + + Dataset: https://doi.org/10.5281/zenodo.6638745 + + Schema: https://doi.org/10.5281/zenodo.6372977 + + This dataset is licensed under a Creative Commons Attribution 4.0 International License. + It contains metadata records of publications, research data, software and projects on the topic of Corona Virus and COVID-19. +This dump is part of the activities of OpenAIRE to support the fight against COVID-19 together with the OpenAIRE COVID-19 Gateway. +The dump consists of a tar archive containing gzip files with one json per line. Please refer [here](#alternative-sub-graph-data-model) for details on the data model of this dump. + +## The dump of funded products + + Dataset: https://doi.org/10.5281/zenodo.6634431 + + Schema: https://doi.org/10.5281/zenodo.6372977 + + This dataset is licensed under a Creative Commons Attribution 4.0 International License. +It contains metadata records of research products (research literature, data, software, other types of research products) with funding +information available in the OpenAIRE Research Graph. Records are grouped by funder in a dedicated archive file. Each tar archive contains +gzip files, each with one json record per line. The model of this dump differs from the one of the whole graph. +Please refer [here](#alternative-sub-graph-data-model) for details on the data model of this dump. + +## The dump of delta projects + + Dataset: https://doi.org/10.5281/zenodo.7119633 + + Schema: https://doi.org/10.5281/zenodo.4238938 + + This dataset is licensed under a Creative Commons Attribution 4.0 International License. + It contains the metadata records of projects collected by OpenAIRE in a given time frame. Usually one deposition of collected projects is done for each release of the OpenAIRE Research Graph + The deposition is one tar archive containing gzip files, each with one json record per line. + +## The dumps about research communities, initiatives and infrastructures + + Dataset: https://doi.org/10.5281/zenodo.6638478 + + Schema: https://doi.org/10.5281/zenodo.6372977 + + This dataset is licensed under a Creative Commons Attribution 4.0 International License. +The dataset contains one file per community/initiative/infrastructure collaborating with OpenAIRE. Check out also their community gateways on + CONNECT. Each file is a tar archive containing gzip files with one json per line. The only communities/research initiative/infrastructure we dump are those visible to everyone. + The model of this dump differs from the one of the whole graph. +Please refer [here](#alternative-sub-graph-data-model) for details on the data model of this dump. + + --- + + ## Alternative sub-graph data model + + It should be noted that the dumps for research communities, infrastructures, and products related to projects do not strictly follow the main data model of the OpenAIRE Research Graph. In particular, they differ in the following: + + * only research products are dumped (no relations, and entities different from results) + * the dumped results are extended with information that can be inferred in the whole dump namely: + * funding information if present + * associated research community/infrastructure + * associated data sources + +So they have just one entity type, that is the [Extended Result](alternative-model/extendedresult.md). diff --git a/docs/intro.md b/docs/intro.md index feaaca3..95b1b40 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -6,12 +6,12 @@ sidebar_position: 1 # Overview -The OpenAIRE Graph is one of the largest open scholarly record collections worldwide, key in fostering Open Science and establishing its practices in the daily research activities. +The OpenAIRE Research Graph is one of the largest open scholarly record collections worldwide, key in fostering Open Science and establishing its practices in the daily research activities. Conceived as a public and transparent good, populated out of data sources trusted by scientists, the Graph aims at bringing discovery, monitoring, and assessment of science back in the hands of the scientific community. Imagine a vast collection of research products all linked together, contextualised and openly available. For the past years OpenAIRE has been working to gather this valuable record. It is a massive collection of metadata and links between scientific products such as articles, datasets, software, and other research products, entities like organisations, funders, funding streams, projects, communities, and data sources. -As of today, the OpenAIRE Graph aggregates hundreds of millions of metadata records (and links among them) from multiple data sources trusted by scientists, including: +As of today, the OpenAIRE Research Graph aggregates hundreds of millions of metadata records (and links among them) from multiple data sources trusted by scientists, including: * Repositories registered in OpenDOAR or re3data.org (soon FAIRSharing.org) * Open Access journals registered in DOAJ diff --git a/docs/license.md b/docs/license.md index 86ddff5..e9d730c 100644 --- a/docs/license.md +++ b/docs/license.md @@ -4,5 +4,5 @@ sidebar_position: 11 # License -OpenAIRE Graph is available for download and re-use as CC-BY (due to some input sources whose license is CC-BY). Parts of the graphs can be re-used as CC-0. +OpenAIRE Research Graph is available for download and re-use as CC-BY (due to some input sources whose license is CC-BY). Parts of the graphs can be re-used as CC-0. diff --git a/docs/publications.md b/docs/publications.md index 7c6495f..0324d36 100644 --- a/docs/publications.md +++ b/docs/publications.md @@ -2,70 +2,78 @@ sidebar_position: 7 --- -# How to cite +# Relevant publications -Open Science services are open and transparent and survive thanks to your active support and to the visibility and reward they gather. If you use one of the [OpenAIRE Graph dumps](https://zenodo.org/record/6616871) for your research, please provide a proper citation following the recommendation that you find on the dump's Zenodo page. +Open Science services are open and transparent and survive thanks to your active support and to the visibility and reward they gather. If you use one of the [OpenAIRE Research Graph dumps](https://doi.org/10.5281/zenodo.3516917) for your research, please provide a proper citation following the recommendation that you find on the dump's Zenodo page or as provided below. -## Relevant research products +:::note How to cite + +Manghi P., Atzori C., Bardi A., Baglioni M., Schirrwagen J., Dimitropoulos H., La Bruzzo S., Foufoulas I., Mannocci A., Horst M., Czerniak A., Kiatropoulou K., Kokogiannaki A., De Bonis M., Artini M., Ottonello E., Lempesis A., Ioannidis A., Manola N., Principe P. (2022). "OpenAIRE Research Graph Dump", *Dataset*, Zenodo. [doi:10.5281/zenodo.3516917](https://doi.org/10.5281/zenodo.3516917) ([BibTex](/bibtex/OpenAIRE_Research_Graph_dump.bib)) +::: + +## Other relevant research products + +Please also consider citing the related research products listed below. ### Aggregation system -Manghi, P., Artini, M., Atzori, C., Bardi, A., Mannocci, A., La Bruzzo, S., Candela, L., Castelli, D. and Pagano, P. (2014), “The D-NET software toolkit: A framework for the realization, maintenance, and operation of aggregative infrastructures”, Program: electronic library and information systems, Vol. 48 No. 4, pp. 322-354. [doi:10.1108/prog-08-2013-0045](http://doi.org/10.1108/prog-08-2013-0045) +Manghi P., Artini M., Atzori C., Bardi A., Mannocci A., La Bruzzo S., Candela L., Castelli D., Pagano P. (2014). "The D-NET software toolkit: A framework for the realization, maintenance, and operation of aggregative infrastructures", Program: electronic library and information systems, Vol. 48 No. 4, pp. 322-354. [doi:10.1108/prog-08-2013-0045](http://doi.org/10.1108/prog-08-2013-0045) -Atzori, C., Bardi, A., Manghi, P., & Mannocci, A. (2017, January). "The OpenAIRE workflows for data management". In Italian Research Conference on Digital Libraries (pp. 95-107). Springer, Cham. [doi:10.1007/978-3-319-68130-6_8](https://doi.org/10.1007/978-3-319-68130-6_8) +Atzori C., Bardi A., Manghi P., Mannocci A. (2017). "The OpenAIRE workflows for data management", In Italian Research Conference on Digital Libraries (IRCDL), pp. 95-107, Springer, Cham. [doi:10.1007/978-3-319-68130-6_8](https://doi.org/10.1007/978-3-319-68130-6_8) -*Software* Michele Artini, Claudio Atzori, Alessia Bardi, Sandro La Bruzzo, Paolo Manghi, & Andrea Mannocci. (2016, November 24). "The D-NET software toolkit: dnet-basic-aggregator (Version 1.3.0)". Zenodo. [doi:10.5281/zenodo.168356](https://doi.org/10.5281/zenodo.168356) +Artini M., Atzori C., Bardi A., La Bruzzo S., Manghi P., Mannocci A. (2016). "The D-NET software toolkit: dnet-basic-aggregator (Version 1.3.0)". *Software*, Zenodo. [doi:10.5281/zenodo.168356](https://doi.org/10.5281/zenodo.168356) -Mannocci, A., & Manghi, P. (2016, September). "DataQ: a data flow quality monitoring system for aggregative data infrastructures". In International Conference on Theory and Practice of Digital Libraries (pp. 357-369). Springer, Cham. [doi:10.1007/978-3-319-43997-6_28](https://doi.org/10.1007/978-3-319-43997-6_28) +Mannocci A., Manghi P. (2016). "DataQ: a data flow quality monitoring system for aggregative data infrastructures", International Conference on Theory and Practice of Digital Libraries (TPDL), pp. 357-369, Springer, Cham. [doi:10.1007/978-3-319-43997-6_28](https://doi.org/10.1007/978-3-319-43997-6_28) ### Deduplication -Vichos K., De Bonis M., Kanellos I., Chatzopoulos S., Atzori C., Manola N., Manghi P., Vergoulis T. (Feb. 2022), "A preliminary assessment of the article deduplication algorithm used for the OpenAIRE Graph". IRCDL 2022 - 18th Italian Research Conference on Digital Libraries, Padua, Italy. CEUR-WS Proceedings. [http://ceur-ws.org/Vol-3160](http://ceur-ws.org/Vol-3160/) +Vichos K., De Bonis M., Kanellos I., Chatzopoulos S., Atzori C., Manola N., Manghi P., Vergoulis T. (2022). "A preliminary assessment of the article deduplication algorithm used for the OpenAIRE Research Graph", In Italian Research Conference on Digital Libraries (IRCDL), Padua, Italy, CEUR-WS Proceedings. [http://ceur-ws.org/Vol-3160](http://ceur-ws.org/Vol-3160/) -De Bonis, M., Manghi, P., & Atzori, C. (2022). "FDup: a framework for general-purpose and efficient entity deduplication of record collections". PeerJ Computer Science, 8, e1058. [https://peerj.com/articles/cs-1058](https://peerj.com/articles/cs-1058) +De Bonis M., Manghi P., Atzori C. (2022). "FDup: a framework for general-purpose and efficient entity deduplication of record collections", PeerJ Computer Science, 8, e1058. [https://peerj.com/articles/cs-1058](https://peerj.com/articles/cs-1058) -Manghi, P., Atzori, C., De Bonis, M., & Bardi, A. (2020). "Entity deduplication in big data graphs for scholarly communication". Data Technologies and Applications. [doi:10.1108/dta-09-2019-0163](https://doi.org/10.1108/dta-09-2019-0163) +Manghi P., Atzori C., De Bonis M., Bardi, A. (2020). "Entity deduplication in big data graphs for scholarly communication", Data Technologies and Applications. [doi:10.1108/dta-09-2019-0163](https://doi.org/10.1108/dta-09-2019-0163) -Atzori, C., Manghi, P., & Bardi, A. (2018, December). "GDup: de-duplication of scholarly communication big graphs". In 2018 IEEE/ACM 5th International Conference on Big Data Computing Applications and Technologies (BDCAT) (pp. 142-151). IEEE. [doi:10.1109/bdcat.2018.00025](https://doi.org/10.1109/bdcat.2018.00025) +Atzori C., Manghi P., Bardi, A. (2018). "GDup: de-duplication of scholarly communication big graphs", In 2018 IEEE/ACM 5th International Conference on Big Data Computing Applications and Technologies (BDCAT) (pp. 142-151). IEEE. [doi:10.1109/bdcat.2018.00025](https://doi.org/10.1109/bdcat.2018.00025) -*Software* Claudio Atzori, & Paolo Manghi. (2017, February 17). "GDup: a big graph entity deduplication system" (Version 4.0.5). Zenodo. [doi:/10.5281/zenodo.292980](https://doi.org/10.5281/zenodo.292980) +Atzori C., & Paolo Manghi. (2017). "GDup: a big graph entity deduplication system" (Version 4.0.5), *Software*, Zenodo. [doi:/10.5281/zenodo.292980](https://doi.org/10.5281/zenodo.292980) -Atzori, Claudio. "GDup: an Integrated, Scalable Big Graph Deduplication System." (2016). [doi:10.5281/zenodo.1454879](https://doi.org/10.5281/zenodo.1454879) +Atzori C. (2016). "GDup: an Integrated, Scalable Big Graph Deduplication System.". [doi:10.5281/zenodo.1454879](https://doi.org/10.5281/zenodo.1454879) -Manghi, Paolo, Marko Mikulicic, and Claudio Atzori. "De-duplication of aggregation authority files." International Journal of Metadata, Semantics and Ontologies 7.2 (2012): 114-130. [doi:10.1504/ijmso.2012.050014](https://doi.org/10.1504/ijmso.2012.050014) +Manghi P., Mikulicic M., Atzori C. (2012). "De-duplication of aggregation authority files." International Journal of Metadata, Semantics and Ontologies 7.2: 114-130. [doi:10.1504/ijmso.2012.050014](https://doi.org/10.1504/ijmso.2012.050014) -Manghi, P., & Mikulicic, M. (2011, October). "PACE: A general-purpose tool for authority control". In Research Conference on Metadata and Semantic Research (pp. 80-92). Springer, Berlin, Heidelberg. [doi:10.1007/978-3-642-24731-6_8](https://doi.org/10.1007/978-3-642-24731-6_8) +Manghi P., Mikulicic M. (2011). "PACE: A general-purpose tool for authority control", In Research Conference on Metadata and Semantic Research, pp. 80-92, Springer, Berlin, Heidelberg. [doi:10.1007/978-3-642-24731-6_8](https://doi.org/10.1007/978-3-642-24731-6_8) ### Mining -Giannakopoulos T., Foufoulas Y., Dimitropoulos H., Manola N. (2019) “Interactive Text Analysis and Information Extraction”. In: Manghi P., Candela L., Silvello G. (eds) Digital Libraries: Supporting Open Science. IRCDL 2019. Communications in Computer and Information Science, vol 988. Springer, Cham. [doi:10.1007/978-3-030-11226-4_27](https://doi.org/10.1007/978-3-030-11226-4_27) +Giannakopoulos T., Foufoulas Y., Dimitropoulos H., Manola N. (2019). "Interactive Text Analysis and Information Extraction", In Italian Research Conference on Digital Libraries (IRCDL), vol 988. Springer, Cham. [doi:10.1007/978-3-030-11226-4_27](https://doi.org/10.1007/978-3-030-11226-4_27) -Foufoulas Y., Stamatogiannakis L., Dimitropoulos H., Ioannidis Y. (2017) “High-Pass Text Filtering for Citation Matching”. In: Kamps J., Tsakonas G., Manolopoulos Y., Iliadis L., Karydis I. (eds) Research and Advanced Technology for Digital Libraries. TPDL 2017. Lecture Notes in Computer Science, vol 10450. Springer, Cham. [doi:10.1007/978-3-319-67008-9_28](https://doi.org/10.1007/978-3-319-67008-9_28) +Foufoulas Y., Stamatogiannakis L., Dimitropoulos H., Ioannidis Y. (2017). "High-Pass Text Filtering for Citation Matching", In International Conference on Theory and Practice of Digital Libraries (TPDL). Springer, Cham. [doi:10.1007/978-3-319-67008-9_28](https://doi.org/10.1007/978-3-319-67008-9_28) -Y. Chronis, Y. Foufoulas, V. Nikolopoulos, A. Papadopoulos, L. Stamatogiannakis, C. Svingos, Y. E. Ioannidis, "A Relational Approach to Complex Dataflows", in Workshop Proceedings of the EDBT/ICDT 2016 (MEDAL 2016) Joint Conference (March 15, 2016, Bordeaux, France) on CEUR-WS.org (ISSN 1613-0073) [http://ceur-ws.org/Vol-1558/paper45.pdf](http://ceur-ws.org/Vol-1558/paper45.pdf) +Chronis Y., Foufoulas Y., Nikolopoulos V., Papadopoulos A., Stamatogiannakis L., Svingos C., Ioannidis Y. E. (2016). "A Relational Approach to Complex Dataflows", In Workshop Proceedings of the EDBT/ICDT 2016 (MEDAL 2016) Joint Conference on CEUR-WS.org (ISSN 1613-0073) [http://ceur-ws.org/Vol-1558/paper45.pdf](http://ceur-ws.org/Vol-1558/paper45.pdf) -T. Giannakopoulos, I. Foufoulas, E. Stamatogiannakis, H. Dimitropoulos, N. Manola, and Y. Ioannidis. 2015. “Visual-Based Classification of Figures from Scientific Literature”. In Proceedings of the 24th International Conference on World Wide Web (WWW '15 Companion). Association for Computing Machinery, New York, NY, USA, 1059–1060. [doi:10.1145/2740908.2742024](https://doi.org/10.1145/2740908.2742024) +Giannakopoulos T., Foufoulas I., Stamatogiannakis E., Dimitropoulos H., Manola N., Ioannidis Y. (2015). "Visual-Based Classification of Figures from Scientific Literature", In Proceedings of the 24th International Conference on World Wide Web (WWW), Association for Computing Machinery, New York, NY, USA, 1059–1060. [doi:10.1145/2740908.2742024](https://doi.org/10.1145/2740908.2742024) -Giannakopoulos, T., Foufoulas, I., Stamatogiannakis, E., Dimitropoulos, H., Manola, N., & Ioannidis, Y. (2014). “Discovering and Visualizing Interdisciplinary Content Classes in Scientific Publications”. D-Lib Mag., Volume 20, Number 11/12. [doi:10.1045/november14-giannakopoulos](https://doi.org/10.1045/november14-giannakopoulos) +Giannakopoulos T., Foufoulas I., Stamatogiannakis E., Dimitropoulos H., Manola N., Ioannidis Y. (2014). "Discovering and Visualizing Interdisciplinary Content Classes in Scientific Publications". D-Lib Mag., Volume 20, Number 11/12. [doi:10.1045/november14-giannakopoulos](https://doi.org/10.1045/november14-giannakopoulos) -Giannakopoulos T., Stamatogiannakis E., Foufoulas I., Dimitropoulos H., Manola N., Ioannidis Y. (2014) “Content Visualization of Scientific Corpora Using an Extensible Relational Database Implementation”. In: Bolikowski Ł., Casarosa V., Goodale P., Houssos N., Manghi P., Schirrwagen J. (eds) Theory and Practice of Digital Libraries -- TPDL 2013 Selected Workshops. TPDL 2013. Communications in Computer and Information Science, vol 416. Springer, Cham. [doi:10.1007/978-3-319-08425-1_10](https://doi.org/10.1007/978-3-319-08425-1_10) +Giannakopoulos T., Stamatogiannakis E., Foufoulas I., Dimitropoulos H., Manola N., Ioannidis Y. (2014). "Content Visualization of Scientific Corpora Using an Extensible Relational Database Implementation", International Conference on Theory and Practice of Digital Libraries (TPDL), Springer, Cham. [doi:10.1007/978-3-319-08425-1_10](https://doi.org/10.1007/978-3-319-08425-1_10) -Giannakopoulos T., Dimitropoulos H., Metaxas O., Manola N., Ioannidis Y. (2013) “Supervised Content Visualization of Scientific Publications: A Case Study on the ArXiv Dataset”. In: Kłopotek M.A., Koronacki J., Marciniak M., Mykowiecka A., Wierzchoń S.T. (eds) Language Processing and Intelligent Information Systems. IIS 2013. Lecture Notes in Computer Science, vol 7912. Springer, Berlin, Heidelberg. [doi:10.1007/978-3-642-38634-3_23](https://doi.org/10.1007/978-3-642-38634-3_23) +Giannakopoulos T., Dimitropoulos H., Metaxas O., Manola N., Ioannidis Y. (2013). "Supervised Content Visualization of Scientific Publications: A Case Study on the ArXiv Dataset", Intelligent Information Systems Symposium (IIS) vol 7912, Springer, Berlin, Heidelberg. [doi:10.1007/978-3-642-38634-3_23](https://doi.org/10.1007/978-3-642-38634-3_23) -Tkaczyk, D., Szostek, P., Fedoryszak, M. et al. "CERMINE: automatic extraction of structured metadata from scientific literature". IJDAR 18, 317–335 (2015). [doi:10.1007/s10032-015-0249-8](https://doi.org/10.1007/s10032-015-0249-8) +Tkaczyk, D., Szostek, P., Fedoryszak, M., Jan Dendek P., Bolikowski Ł. (2015). "CERMINE: automatic extraction of structured metadata from scientific literature", International Journal on Document Analysis and Recognition (IJDAR), 317–335. [doi:10.1007/s10032-015-0249-8](https://doi.org/10.1007/s10032-015-0249-8) -M. Kobos, Ł. Bolikowski, M. Horst, P. Manghi, N. Manola, J. Schirrwagen (2014) “Information inference in scholarly communication infrastructures: the OpenAIREplus project experience”, Procedia Computer Science 38, 92-99. [doi:10.1016/j.procs.2014.10.016](https://doi.org/10.1016/j.procs.2014.10.016) +Kobos M., Bolikowski Ł., Horst M., Manghi P., Μanola N., Schirrwagen J. (2014). "Information inference in scholarly communication infrastructures: the OpenAIREplus project experience", Procedia Computer Science 38, 92-99. [doi:10.1016/j.procs.2014.10.016](https://doi.org/10.1016/j.procs.2014.10.016) ### Portals -Baglioni M. et al. (2019) "The OpenAIRE Research Community Dashboard: On Blending Scientific Workflows and Scientific Publishing". In: Doucet A., Isaac A., Golub K., Aalberg T., Jatowt A. (eds) Digital Libraries for Open Knowledge. TPDL 2019. Lecture Notes in Computer Science, vol 11799. Springer, Cham. [doi:10.1007/978-3-030-30760-8_5](https://doi.org/10.1007/978-3-030-30760-8_5) +Baglioni Μ., Bardi Α., Kokogiannaki Α., Manghi P., Iatropoulou K., Principe P., Vieira A., Nielsen L. H., Dimitropoulos H., Foufoulas I., Manola N., Atzori C., La Bruzzo S., Lazzeri E., Artini M., De Bonis M., Dell’Amico A. (2019). "The OpenAIRE Research Community Dashboard: On Blending Scientific Workflows and Scientific Publishing", +International Conference on Theory and Practice of Digital Libraries (TPDL). Lecture Notes in Computer Science, vol 11799. Springer, Cham. [doi:10.1007/978-3-030-30760-8_5](https://doi.org/10.1007/978-3-030-30760-8_5) ### Broker Service -Manghi, P., Atzori, C., Bardi, A., La Bruzzo, S., & Artini, M. (2016, February). "Realizing a Scalable and History-Aware Literature Broker Service for OpenAIRE". In Italian Research Conference on Digital Libraries (pp. 92-103). Springer, Cham. [doi:10.1007/978-3-319-56300-8_9](https://doi.org/10.1007/978-3-319-56300-8_9) +Manghi P., Atzori C., Bardi A., La Bruzzo S., Artini M. (2016). "Realizing a Scalable and History-Aware Literature Broker Service for OpenAIRE", Italian Research Conference on Digital Libraries (IRCDL), pp. 92-103, Springer, Cham. [doi:10.1007/978-3-319-56300-8_9](https://doi.org/10.1007/978-3-319-56300-8_9) -Artini, M., Atzori, C., Bardi, A., La Bruzzo, S., Manghi, P., & Mannocci, A. (2015). "The OpenAIRE literature broker service for institutional repositories". D-Lib Magazine, 21(11/12), 1. [doi:10.1045/november2015-artini](https://doi.org/10.1045/november2015-artini) +Artini M., Atzori C., Bardi A., La Bruzzo S., Manghi P., Mannocci A. (2015). "The OpenAIRE literature broker service for institutional repositories", D-Lib Magazine, 21(11/12), 1. [doi:10.1045/november2015-artini](https://doi.org/10.1045/november2015-artini) diff --git a/docs/services.md b/docs/services.md deleted file mode 100644 index ee763ea..0000000 --- a/docs/services.md +++ /dev/null @@ -1,20 +0,0 @@ ---- -sidebar_position: 8 ---- - -# Graph-based services - -## Explore -TODO - -## Provide -TODO - -## Connect -TODO - -## Monitor -TODO - -## Develop -TODO diff --git a/docusaurus.config.js b/docusaurus.config.js index e49bbc8..3254a5c 100644 --- a/docusaurus.config.js +++ b/docusaurus.config.js @@ -5,14 +5,23 @@ const lightCodeTheme = require('prism-react-renderer/themes/github'); const darkCodeTheme = require('prism-react-renderer/themes/dracula'); const math = require('remark-math'); const katex = require('rehype-katex'); -const { filterItems } = require('./sidebar-utils'); +const dotenv = require('dotenv'); + +// load env variables (see .env file) +const env = dotenv.config(); +if (env.error) { + throw env.error; +} + +console.info("ENV VARIABLES:"); +console.info(env.parsed); /** @type {import('@docusaurus/types').Config} */ const config = { - title: 'OpenAIRE Documentation', + title: 'OpenAIRE Research Graph Documentation', tagline: 'Open Access Infrastructure for Research in Europe', - url: 'http://snf-23385.ok-kno.grnetcloud.net', - baseUrl: '/', // serve the website at route + url: process.env.URL, + baseUrl: process.env.BASE_URL, // serve the website at route onBrokenLinks: 'throw', onBrokenMarkdownLinks: 'warn', favicon: 'img/favicon.ico', @@ -29,7 +38,19 @@ const config = { defaultLocale: 'en', locales: ['en'], }, - + themes: [ + [ + require.resolve("@easyops-cn/docusaurus-search-local"), + /** @type {import("@easyops-cn/docusaurus-search-local").PluginOptions} */ + ({ + language: ["en"], + indexBlog: false, + highlightSearchTermsOnTargetPage: true, + searchBarShortcutHint: false, + docsRouteBasePath: "/", + }), + ], + ], presets: [ [ 'classic', @@ -37,18 +58,7 @@ const config = { ({ docs: { routeBasePath: '/', // serve the docs at the site's route - sidebarPath: require.resolve('./sidebars.js'), - async sidebarItemsGenerator({ defaultSidebarItemsGenerator, ...args }) { - const sidebarItems = await defaultSidebarItemsGenerator(args); - - const itemsToFilterOut = [ - 'data-model/entities/entity-identifiers', - 'data-model/entities/other' - ]; - - return filterItems(sidebarItems, itemsToFilterOut); - }, // Please change this to your repo. // Remove this to remove the "edit this page" links. // editUrl: @@ -63,6 +73,12 @@ const config = { // }, theme: { customCss: require.resolve('./src/css/custom.css'), + }, + sitemap: { + changefreq: 'monthly', + priority: 0.5, + ignorePatterns: ['/tags/**'], + filename: 'sitemap.xml', }, }), ], @@ -81,98 +97,45 @@ const config = { /** @type {import('@docusaurus/preset-classic').ThemeConfig} */ ({ navbar: { - // title: 'OpenAIRE Documentation', + title: 'documentation', logo: { alt: 'OpenAIRE', src: 'img/logo.png', }, items: [ - { - type: 'doc', - docId: 'intro', - position: 'left', - label: 'Research graph v5.0', - }, - // - // documentation version in the navbar // { - // type: 'docsVersionDropdown', - // position: 'right' + // type: 'doc', + // docId: 'intro', + // position: 'left', + // label: 'Research graph v5.0', // }, // + // documentation version in the navbar + { + type: 'docsVersionDropdown', + position: 'right' + }, + // link to blog, the blog must be enabled first // {to: '/blog', label: 'Blog', position: 'left'}, - // + // link to github repo // { // href: 'https://github.com/facebook/docusaurus', - // label: 'GitHub', + // label: 'Issues', // position: 'right', // }, ], }, footer: { - style: 'dark', - links: [ - { - title: 'Docs', - items: [ - { - label: 'Research Graph', - to: '/', - }, - ], - }, - { - title: 'Dashboards', - items: [ - { - label: 'Explore', - href: 'https://explore.openaire.eu/', - }, - { - label: 'Provide', - href: 'https://provide.openaire.eu/', - }, - { - label: 'Connect', - href: 'https://connect.openaire.eu/', - }, - { - label: 'Monitor', - href: 'https://monitor.openaire.eu/', - }, - { - label: 'Develop', - href: 'https://graph.openaire.eu/', - }, - ], - }, - { - title: 'Community', - items: [ - { - label: 'Facebook', - href: 'http://www.facebook.com/groups/openaire/' - }, - { - label: 'Linkedin', - href: 'https://www.linkedin.com/company/openaire-eu/', - }, - { - label: 'Twitter', - href: 'https://twitter.com/OpenAIRE_eu', - }, - { - label: 'Youtube', - href: 'https://www.youtube.com/channel/UChFYqizc-S6asNjQSoWuwjw', - }, - ], - }, - - ], + style: 'light', copyright: `Copyright © ${new Date().getFullYear()} OpenAIRE`, }, + colorMode: { + defaultMode: 'light', + disableSwitch: true, + respectPrefersColorScheme: false, + }, prism: { theme: lightCodeTheme, darkTheme: darkCodeTheme, diff --git a/package-lock.json b/package-lock.json index 7203b12..9745a9c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,8 +10,10 @@ "dependencies": { "@docusaurus/core": "^2.2.0", "@docusaurus/preset-classic": "^2.2.0", + "@easyops-cn/docusaurus-search-local": "^0.33.6", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", + "dotenv": "^16.0.3", "hast-util-is-element": "^1.1.0", "prism-react-renderer": "^1.3.5", "react": "^17.0.2", @@ -2554,6 +2556,46 @@ "node": ">=16.14" } }, + "node_modules/@easyops-cn/autocomplete.js": { + "version": "0.38.1", + "resolved": "https://registry.npmjs.org/@easyops-cn/autocomplete.js/-/autocomplete.js-0.38.1.tgz", + "integrity": "sha512-drg76jS6syilOUmVNkyo1c7ZEBPcPuK+aJA7AksM5ZIIbV57DMHCywiCr+uHyv8BE5jUTU98j/H7gVrkHrWW3Q==", + "dependencies": { + "cssesc": "^3.0.0", + "immediate": "^3.2.3" + } + }, + "node_modules/@easyops-cn/docusaurus-search-local": { + "version": "0.33.6", + "resolved": "https://registry.npmjs.org/@easyops-cn/docusaurus-search-local/-/docusaurus-search-local-0.33.6.tgz", + "integrity": "sha512-3UqsJ42akhHDSlW9SravWQF5ZUI5VEKlzt50djbRRjDdJGefe1v9FpBKVdqHtPPFefcJoagtpfK4/R3dtDeEhw==", + "dependencies": { + "@docusaurus/plugin-content-docs": "^2.0.0-rc.1", + "@docusaurus/theme-translations": "^2.0.0-rc.1", + "@docusaurus/utils": "^2.0.0-rc.1", + "@docusaurus/utils-common": "^2.0.0-rc.1", + "@docusaurus/utils-validation": "^2.0.0-rc.1", + "@easyops-cn/autocomplete.js": "^0.38.1", + "@node-rs/jieba": "^1.6.0", + "cheerio": "^1.0.0-rc.3", + "clsx": "^1.1.1", + "debug": "^4.2.0", + "fs-extra": "^10.0.0", + "klaw-sync": "^6.0.0", + "lunr": "^2.3.9", + "lunr-languages": "^1.4.0", + "mark.js": "^8.11.1", + "tslib": "^2.4.0" + }, + "engines": { + "node": ">=12" + }, + "peerDependencies": { + "@docusaurus/theme-common": "^2.0.0-rc.1", + "react": "^16.14.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.14.0 || ^17.0.0 || ^18.0.0" + } + }, "node_modules/@hapi/hoek": { "version": "9.3.0", "resolved": "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz", @@ -2761,6 +2803,228 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/@node-rs/jieba": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba/-/jieba-1.6.1.tgz", + "integrity": "sha512-pISKu8NIYKRvZp7mhYZYA8VCjJMqTsCe+mQcFFnAi3GNJsijGjef2peMFeDcvP72X8MsnNeYeg3rHkAybtefyQ==", + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@node-rs/jieba-android-arm-eabi": "1.6.1", + "@node-rs/jieba-android-arm64": "1.6.1", + "@node-rs/jieba-darwin-arm64": "1.6.1", + "@node-rs/jieba-darwin-x64": "1.6.1", + "@node-rs/jieba-freebsd-x64": "1.6.1", + "@node-rs/jieba-linux-arm-gnueabihf": "1.6.1", + "@node-rs/jieba-linux-arm64-gnu": "1.6.1", + "@node-rs/jieba-linux-arm64-musl": "1.6.1", + "@node-rs/jieba-linux-x64-gnu": "1.6.1", + "@node-rs/jieba-linux-x64-musl": "1.6.1", + "@node-rs/jieba-win32-arm64-msvc": "1.6.1", + "@node-rs/jieba-win32-ia32-msvc": "1.6.1", + "@node-rs/jieba-win32-x64-msvc": "1.6.1" + } + }, + "node_modules/@node-rs/jieba-android-arm-eabi": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm-eabi/-/jieba-android-arm-eabi-1.6.1.tgz", + "integrity": "sha512-R1YQfsPr7sK3Tq1sM0//6lNAGJK9RnMT0ShITT+7EJYr5OufUBb38lf/mRhrLxR0NF1pycEsMjdCAwrWrHd8rA==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-android-arm64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm64/-/jieba-android-arm64-1.6.1.tgz", + "integrity": "sha512-hBRbj2uLmRFYDw2lWppTAPoyjeXkBKUT84h4fHUQj7CMU94Gc1IWkE4ocCqhvUhbaUXlCpocS9mB0/fc2641bw==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-darwin-arm64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-arm64/-/jieba-darwin-arm64-1.6.1.tgz", + "integrity": "sha512-GeoDe7XVTF6z8JUtD98QvwudsMaHV5EBXs5uO43SobeIkShH3Nujq5gLMD5kWoJXTxDrTgJe4wT42EwUaBEH2Q==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-darwin-x64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-x64/-/jieba-darwin-x64-1.6.1.tgz", + "integrity": "sha512-ENHYIS8b8JdMaUXEm0f8Y3+sHXu2UdukG1D/XGUNx+q5cn07HbwIg6L0tlGhE8dw4AhqoWHsExVaZ241Igh4iA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-freebsd-x64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-freebsd-x64/-/jieba-freebsd-x64-1.6.1.tgz", + "integrity": "sha512-chwB/9edtxqS8Jm3j4RMaJjH9AlXmijUgKv02oMw36e77HKpko+tENUN25Vrn/9GKsKGqIPeXpmCjeXCN1HVQA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-linux-arm-gnueabihf": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm-gnueabihf/-/jieba-linux-arm-gnueabihf-1.6.1.tgz", + "integrity": "sha512-tsb5fMGj4p8bHGfkf7bJ+HE2jxaixLTp3YnGg5D+kp8+HQRq8cp3ScG5cn8cq0phnJS/zfAp8rVfWInDagzKKQ==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-linux-arm64-gnu": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-gnu/-/jieba-linux-arm64-gnu-1.6.1.tgz", + "integrity": "sha512-bSInORkJFfeZNR+i4rFoSZGbwkQtQlnZ0XfT/noTK9JUBDYErqQZPFjoaYAU45NWTk7p6Zkg30SuV1NTdWLaPw==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-linux-arm64-musl": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-musl/-/jieba-linux-arm64-musl-1.6.1.tgz", + "integrity": "sha512-qphL6xM7owfU8Hsh7GX73SDr/iApbnc+35mSLxbibAfCQnY89+WcBeWUUOSGM/Ov3VFaq4pyVlDFj0YjR01W2w==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-linux-x64-gnu": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-gnu/-/jieba-linux-x64-gnu-1.6.1.tgz", + "integrity": "sha512-f6hhlrbi2wel0xZG7m3Wvksimt9MSu1f3aYO2Kwavf4qjMRZqJzLz9HlCJAal6AXB9Qgg+685P+gftsWve47qw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-linux-x64-musl": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-musl/-/jieba-linux-x64-musl-1.6.1.tgz", + "integrity": "sha512-cTVcdR6zWqpnmdEUyWEII9zfE5lTeWN53TbiOPx8TCA+291/31Vqd7GA8YEPndUO8qgCx5uShSDFStBAEIhYNQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-win32-arm64-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-arm64-msvc/-/jieba-win32-arm64-msvc-1.6.1.tgz", + "integrity": "sha512-YuOTrjHazDraXcGXRHgPQ53nyJuH8QtTCngYKjAzxsdt8uN+txb1AY69OLMLBBZqLTOwY9dgcW70vGiLQMCTeg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-win32-ia32-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-ia32-msvc/-/jieba-win32-ia32-msvc-1.6.1.tgz", + "integrity": "sha512-4+E843ImGpVlZ+LlT9E/13NHmmUg3UHQx419D6fFMorJUUQuK4cZJfE1z4tCgcrbV8S5Wew5LIFywlJeJLu0LQ==", + "cpu": [ + "ia32" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@node-rs/jieba-win32-x64-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-x64-msvc/-/jieba-win32-x64-msvc-1.6.1.tgz", + "integrity": "sha512-veXNwm2VlseOzl7vaC7A/nZ4okp5/6edN7/Atj6mXnUbze/m/my5Rv5zUcW3U1D9VElnQ3srCHCa5vXljJuk6g==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -5422,6 +5686,14 @@ "node": ">=8" } }, + "node_modules/dotenv": { + "version": "16.0.3", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==", + "engines": { + "node": ">=12" + } + }, "node_modules/duplexer": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.2.tgz", @@ -6905,6 +7177,11 @@ "node": ">=14.0.0" } }, + "node_modules/immediate": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.3.0.tgz", + "integrity": "sha512-HR7EVodfFUdQCTIeySw+WDRFJlPcLOJbXfwwZ7Oom6tjsvZ3bOkCDJHehQC3nxJrv7+f9XecwazynjU8e4Vw3Q==" + }, "node_modules/immer": { "version": "9.0.15", "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.15.tgz", @@ -7461,6 +7738,14 @@ "node": ">=0.10.0" } }, + "node_modules/klaw-sync": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/klaw-sync/-/klaw-sync-6.0.0.tgz", + "integrity": "sha512-nIeuVSzdCCs6TDPTqI8w1Yre34sSq7AkZ4B3sfOBbI2CgVSB4Du4aLQijFU2+lhAFCwt9+42Hel6lQNIv6AntQ==", + "dependencies": { + "graceful-fs": "^4.1.11" + } + }, "node_modules/kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -7609,6 +7894,16 @@ "node": ">=10" } }, + "node_modules/lunr": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", + "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==" + }, + "node_modules/lunr-languages": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/lunr-languages/-/lunr-languages-1.10.0.tgz", + "integrity": "sha512-BBjKKcwrieJlzwwc9M5H/MRXGJ2qyOSDx/NXYiwkuKjiLOOoouh0WsDzeqcLoUWcX31y7i8sb8IgsZKObdUCkw==" + }, "node_modules/make-dir": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", @@ -7631,6 +7926,11 @@ "semver": "bin/semver.js" } }, + "node_modules/mark.js": { + "version": "8.11.1", + "resolved": "https://registry.npmjs.org/mark.js/-/mark.js-8.11.1.tgz", + "integrity": "sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ==" + }, "node_modules/markdown-escapes": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/markdown-escapes/-/markdown-escapes-1.0.4.tgz", @@ -14232,6 +14532,38 @@ "tslib": "^2.4.0" } }, + "@easyops-cn/autocomplete.js": { + "version": "0.38.1", + "resolved": "https://registry.npmjs.org/@easyops-cn/autocomplete.js/-/autocomplete.js-0.38.1.tgz", + "integrity": "sha512-drg76jS6syilOUmVNkyo1c7ZEBPcPuK+aJA7AksM5ZIIbV57DMHCywiCr+uHyv8BE5jUTU98j/H7gVrkHrWW3Q==", + "requires": { + "cssesc": "^3.0.0", + "immediate": "^3.2.3" + } + }, + "@easyops-cn/docusaurus-search-local": { + "version": "0.33.6", + "resolved": "https://registry.npmjs.org/@easyops-cn/docusaurus-search-local/-/docusaurus-search-local-0.33.6.tgz", + "integrity": "sha512-3UqsJ42akhHDSlW9SravWQF5ZUI5VEKlzt50djbRRjDdJGefe1v9FpBKVdqHtPPFefcJoagtpfK4/R3dtDeEhw==", + "requires": { + "@docusaurus/plugin-content-docs": "^2.0.0-rc.1", + "@docusaurus/theme-translations": "^2.0.0-rc.1", + "@docusaurus/utils": "^2.0.0-rc.1", + "@docusaurus/utils-common": "^2.0.0-rc.1", + "@docusaurus/utils-validation": "^2.0.0-rc.1", + "@easyops-cn/autocomplete.js": "^0.38.1", + "@node-rs/jieba": "^1.6.0", + "cheerio": "^1.0.0-rc.3", + "clsx": "^1.1.1", + "debug": "^4.2.0", + "fs-extra": "^10.0.0", + "klaw-sync": "^6.0.0", + "lunr": "^2.3.9", + "lunr-languages": "^1.4.0", + "mark.js": "^8.11.1", + "tslib": "^2.4.0" + } + }, "@hapi/hoek": { "version": "9.3.0", "resolved": "https://registry.npmjs.org/@hapi/hoek/-/hoek-9.3.0.tgz", @@ -14397,6 +14729,104 @@ "resolved": "https://registry.npmjs.org/@mdx-js/util/-/util-1.6.22.tgz", "integrity": "sha512-H1rQc1ZOHANWBvPcW+JpGwr+juXSxM8Q8YCkm3GhZd8REu1fHR3z99CErO1p9pkcfcxZnMdIZdIsXkOHY0NilA==" }, + "@node-rs/jieba": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba/-/jieba-1.6.1.tgz", + "integrity": "sha512-pISKu8NIYKRvZp7mhYZYA8VCjJMqTsCe+mQcFFnAi3GNJsijGjef2peMFeDcvP72X8MsnNeYeg3rHkAybtefyQ==", + "requires": { + "@node-rs/jieba-android-arm-eabi": "1.6.1", + "@node-rs/jieba-android-arm64": "1.6.1", + "@node-rs/jieba-darwin-arm64": "1.6.1", + "@node-rs/jieba-darwin-x64": "1.6.1", + "@node-rs/jieba-freebsd-x64": "1.6.1", + "@node-rs/jieba-linux-arm-gnueabihf": "1.6.1", + "@node-rs/jieba-linux-arm64-gnu": "1.6.1", + "@node-rs/jieba-linux-arm64-musl": "1.6.1", + "@node-rs/jieba-linux-x64-gnu": "1.6.1", + "@node-rs/jieba-linux-x64-musl": "1.6.1", + "@node-rs/jieba-win32-arm64-msvc": "1.6.1", + "@node-rs/jieba-win32-ia32-msvc": "1.6.1", + "@node-rs/jieba-win32-x64-msvc": "1.6.1" + } + }, + "@node-rs/jieba-android-arm-eabi": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm-eabi/-/jieba-android-arm-eabi-1.6.1.tgz", + "integrity": "sha512-R1YQfsPr7sK3Tq1sM0//6lNAGJK9RnMT0ShITT+7EJYr5OufUBb38lf/mRhrLxR0NF1pycEsMjdCAwrWrHd8rA==", + "optional": true + }, + "@node-rs/jieba-android-arm64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-android-arm64/-/jieba-android-arm64-1.6.1.tgz", + "integrity": "sha512-hBRbj2uLmRFYDw2lWppTAPoyjeXkBKUT84h4fHUQj7CMU94Gc1IWkE4ocCqhvUhbaUXlCpocS9mB0/fc2641bw==", + "optional": true + }, + "@node-rs/jieba-darwin-arm64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-arm64/-/jieba-darwin-arm64-1.6.1.tgz", + "integrity": "sha512-GeoDe7XVTF6z8JUtD98QvwudsMaHV5EBXs5uO43SobeIkShH3Nujq5gLMD5kWoJXTxDrTgJe4wT42EwUaBEH2Q==", + "optional": true + }, + "@node-rs/jieba-darwin-x64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-darwin-x64/-/jieba-darwin-x64-1.6.1.tgz", + "integrity": "sha512-ENHYIS8b8JdMaUXEm0f8Y3+sHXu2UdukG1D/XGUNx+q5cn07HbwIg6L0tlGhE8dw4AhqoWHsExVaZ241Igh4iA==", + "optional": true + }, + "@node-rs/jieba-freebsd-x64": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-freebsd-x64/-/jieba-freebsd-x64-1.6.1.tgz", + "integrity": "sha512-chwB/9edtxqS8Jm3j4RMaJjH9AlXmijUgKv02oMw36e77HKpko+tENUN25Vrn/9GKsKGqIPeXpmCjeXCN1HVQA==", + "optional": true + }, + "@node-rs/jieba-linux-arm-gnueabihf": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm-gnueabihf/-/jieba-linux-arm-gnueabihf-1.6.1.tgz", + "integrity": "sha512-tsb5fMGj4p8bHGfkf7bJ+HE2jxaixLTp3YnGg5D+kp8+HQRq8cp3ScG5cn8cq0phnJS/zfAp8rVfWInDagzKKQ==", + "optional": true + }, + "@node-rs/jieba-linux-arm64-gnu": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-gnu/-/jieba-linux-arm64-gnu-1.6.1.tgz", + "integrity": "sha512-bSInORkJFfeZNR+i4rFoSZGbwkQtQlnZ0XfT/noTK9JUBDYErqQZPFjoaYAU45NWTk7p6Zkg30SuV1NTdWLaPw==", + "optional": true + }, + "@node-rs/jieba-linux-arm64-musl": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-arm64-musl/-/jieba-linux-arm64-musl-1.6.1.tgz", + "integrity": "sha512-qphL6xM7owfU8Hsh7GX73SDr/iApbnc+35mSLxbibAfCQnY89+WcBeWUUOSGM/Ov3VFaq4pyVlDFj0YjR01W2w==", + "optional": true + }, + "@node-rs/jieba-linux-x64-gnu": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-gnu/-/jieba-linux-x64-gnu-1.6.1.tgz", + "integrity": "sha512-f6hhlrbi2wel0xZG7m3Wvksimt9MSu1f3aYO2Kwavf4qjMRZqJzLz9HlCJAal6AXB9Qgg+685P+gftsWve47qw==", + "optional": true + }, + "@node-rs/jieba-linux-x64-musl": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-linux-x64-musl/-/jieba-linux-x64-musl-1.6.1.tgz", + "integrity": "sha512-cTVcdR6zWqpnmdEUyWEII9zfE5lTeWN53TbiOPx8TCA+291/31Vqd7GA8YEPndUO8qgCx5uShSDFStBAEIhYNQ==", + "optional": true + }, + "@node-rs/jieba-win32-arm64-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-arm64-msvc/-/jieba-win32-arm64-msvc-1.6.1.tgz", + "integrity": "sha512-YuOTrjHazDraXcGXRHgPQ53nyJuH8QtTCngYKjAzxsdt8uN+txb1AY69OLMLBBZqLTOwY9dgcW70vGiLQMCTeg==", + "optional": true + }, + "@node-rs/jieba-win32-ia32-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-ia32-msvc/-/jieba-win32-ia32-msvc-1.6.1.tgz", + "integrity": "sha512-4+E843ImGpVlZ+LlT9E/13NHmmUg3UHQx419D6fFMorJUUQuK4cZJfE1z4tCgcrbV8S5Wew5LIFywlJeJLu0LQ==", + "optional": true + }, + "@node-rs/jieba-win32-x64-msvc": { + "version": "1.6.1", + "resolved": "https://registry.npmjs.org/@node-rs/jieba-win32-x64-msvc/-/jieba-win32-x64-msvc-1.6.1.tgz", + "integrity": "sha512-veXNwm2VlseOzl7vaC7A/nZ4okp5/6edN7/Atj6mXnUbze/m/my5Rv5zUcW3U1D9VElnQ3srCHCa5vXljJuk6g==", + "optional": true + }, "@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -16363,6 +16793,11 @@ } } }, + "dotenv": { + "version": "16.0.3", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.0.3.tgz", + "integrity": "sha512-7GO6HghkA5fYG9TYnNxi14/7K9f5occMlp3zXAuSxn7CKCxt9xbNWG7yF8hTCSUchlfWSe3uLmlPfigevRItzQ==" + }, "duplexer": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/duplexer/-/duplexer-0.1.2.tgz", @@ -17456,6 +17891,11 @@ "queue": "6.0.2" } }, + "immediate": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.3.0.tgz", + "integrity": "sha512-HR7EVodfFUdQCTIeySw+WDRFJlPcLOJbXfwwZ7Oom6tjsvZ3bOkCDJHehQC3nxJrv7+f9XecwazynjU8e4Vw3Q==" + }, "immer": { "version": "9.0.15", "resolved": "https://registry.npmjs.org/immer/-/immer-9.0.15.tgz", @@ -17830,6 +18270,14 @@ "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz", "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==" }, + "klaw-sync": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/klaw-sync/-/klaw-sync-6.0.0.tgz", + "integrity": "sha512-nIeuVSzdCCs6TDPTqI8w1Yre34sSq7AkZ4B3sfOBbI2CgVSB4Du4aLQijFU2+lhAFCwt9+42Hel6lQNIv6AntQ==", + "requires": { + "graceful-fs": "^4.1.11" + } + }, "kleur": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz", @@ -17945,6 +18393,16 @@ "yallist": "^4.0.0" } }, + "lunr": { + "version": "2.3.9", + "resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.9.tgz", + "integrity": "sha512-zTU3DaZaF3Rt9rhN3uBMGQD3dD2/vFQqnvZCDv4dl5iOzq2IZQqTxu90r4E5J+nP70J3ilqVCrbho2eWaeW8Ow==" + }, + "lunr-languages": { + "version": "1.10.0", + "resolved": "https://registry.npmjs.org/lunr-languages/-/lunr-languages-1.10.0.tgz", + "integrity": "sha512-BBjKKcwrieJlzwwc9M5H/MRXGJ2qyOSDx/NXYiwkuKjiLOOoouh0WsDzeqcLoUWcX31y7i8sb8IgsZKObdUCkw==" + }, "make-dir": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz", @@ -17960,6 +18418,11 @@ } } }, + "mark.js": { + "version": "8.11.1", + "resolved": "https://registry.npmjs.org/mark.js/-/mark.js-8.11.1.tgz", + "integrity": "sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ==" + }, "markdown-escapes": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/markdown-escapes/-/markdown-escapes-1.0.4.tgz", diff --git a/package.json b/package.json index e022bb7..527930d 100644 --- a/package.json +++ b/package.json @@ -16,8 +16,10 @@ "dependencies": { "@docusaurus/core": "^2.2.0", "@docusaurus/preset-classic": "^2.2.0", + "@easyops-cn/docusaurus-search-local": "^0.33.6", "@mdx-js/react": "^1.6.22", "clsx": "^1.2.1", + "dotenv": "^16.0.3", "hast-util-is-element": "^1.1.0", "prism-react-renderer": "^1.3.5", "react": "^17.0.2", diff --git a/release.properties b/release.properties new file mode 100644 index 0000000..dd216a4 --- /dev/null +++ b/release.properties @@ -0,0 +1,8 @@ +#The name of the tag +tag_name=1.1 +# A description of the tag +tag_description=1.1 is our 1st tag +#The release name +release_name=release-1.1 +#The release description +release_description=this is the release 1.1 \ No newline at end of file diff --git a/sidebar-utils.js b/sidebar-utils.js deleted file mode 100644 index ee69999..0000000 --- a/sidebar-utils.js +++ /dev/null @@ -1,18 +0,0 @@ -// filter out specific items from the sidebar -function filterItems(items, itemsToFilter) { - - // filter out items of categories - let result = items.map((item) => { - if (item.type === 'category') { - return {...item, items: filterItems(item.items, itemsToFilter)}; - } - return item; - }); - - // filter out items in current level - return result.filter( item => !itemsToFilter.includes(item.id) ); -} - -module.exports = { - filterItems -}; \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index ba0f462..f799348 100644 --- a/sidebars.js +++ b/sidebars.js @@ -29,7 +29,7 @@ const sidebars = { label: "Entities", link: { type: 'generated-index', - description: 'The main entities of the OpenAIRE Graph are listed below.' + description: 'The main entities of the OpenAIRE Research Graph are listed below.' }, items: [ { type: 'doc', id: 'data-model/entities/result' }, @@ -51,12 +51,22 @@ const sidebars = { href: "https://graph.openaire.eu/develop/overview.html" }, { - type: 'doc', - id: 'download' - }, + type: 'category', + label: "Downloads", + link: { + type: 'generated-index', + description: 'All resources, available for download, are listed below.' + }, + items: [ + { type: 'doc', id: 'downloads/full-graph'}, + { type: 'doc', id: 'downloads/beginners-kit' }, + { type: 'doc', id: 'downloads/subgraphs' }, + { type: 'doc', id: 'downloads/related-datasets' }, + ] + }, { type: 'category', - label: "Data provision", + label: "Graph production workflow", link: {type: 'doc', id: 'data-provision/data-provision'}, items: [ { @@ -64,12 +74,46 @@ const sidebars = { label: "Aggregation", link: {type: 'doc', id: 'data-provision/aggregation/aggregation'}, items: [ - { type: 'doc', id: 'data-provision/aggregation/doiboost', label: 'DOIBoost' }, - { type: 'doc', id: 'data-provision/aggregation/pubmed' }, - { type: 'doc', id: 'data-provision/aggregation/datacite' }, - { type: 'doc', id: 'data-provision/aggregation/ebi', label: 'EMBL-EBI' }, + { + type: 'doc', + label: "OpenAIRE compatible sources", + id: 'data-provision/aggregation/compatible-sources', + }, + { + type: 'category', + label: "Non-compatible sources", + link: { type: 'generated-index' }, + items: [ + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/doiboost', label: 'DOIBoost' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/pubmed' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/datacite' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/ebi', label: 'EMBL-EBI' }, + ] + } ] }, + { + type: 'doc', + id: 'data-provision/merge-by-id' + }, + { + type: 'category', + label: "Enrichment by mining", + link: { + type: 'generated-index', + description: 'The OpenAIRE Research Graph is enriched using the different Text and Data Mining (TDM) algorithms that are grouped in the following categories.' + }, + items: [ + { type: 'doc', id: 'data-provision/enrichment-by-mining/affiliation_matching' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/citation_matching' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/classifies' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/documents_similarity' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/acks' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/cites' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/metadata_extraction' }, + ] + }, + { type: 'doc', id: 'data-provision/cleaning' }, { type: 'category', label: "Deduplication", @@ -80,41 +124,36 @@ const sidebars = { ] }, { - type: 'category', - label: "Enrichment", - link: {type: 'doc', id: 'data-provision/enrichment/enrichment'}, + type: 'category', + label: "Deduction & propagation", + link: { + type: 'generated-index' , + description: 'The OpenAIRE Research Graph is further enriched by the deduction and propagation processes descibed in this section.' + + }, items: [ - { - type: 'category', - label: "Mining algorithms", - link: { - type: 'generated-index', - description: 'The Text and Data Mining (TDM) algorithms used for enriching the OpenAIRE Graph are grouped in the following main categories:' - }, - items: [ - { type: 'doc', id: 'data-provision/enrichment/affiliation_matching' }, - { type: 'doc', id: 'data-provision/enrichment/citation_matching' }, - { type: 'doc', id: 'data-provision/enrichment/classifies' }, - { type: 'doc', id: 'data-provision/enrichment/documents_similarity' }, - { type: 'doc', id: 'data-provision/enrichment/acks' }, - - { type: 'doc', id: 'data-provision/enrichment/cites' }, - - { type: 'doc', id: 'data-provision/enrichment/metadata_extraction' }, - ] - }, - { type: 'doc', id: 'data-provision/enrichment/impact-scores' }, + { type: 'doc', id: 'data-provision/deduction-and-propagation/bulk-tagging' }, + { type: 'doc', id: 'data-provision/deduction-and-propagation/propagation' }, ] }, - { type: 'doc', id: 'data-provision/post-cleaning' }, + { + type: 'category', + label: "Indicators ingestion", + link: { + type: 'generated-index' , + description: 'In this step, the following types of indicators are ingested in the OpenAIRE Research Graph.' + + }, + items: [ + { type: 'doc', id: 'data-provision/indicators-ingestion/impact-scores' }, + { type: 'doc', id: 'data-provision/indicators-ingestion/usage-counts' }, + ] + }, + { type: 'doc', id: 'data-provision/finalisation' }, { type: 'doc', id: 'data-provision/indexing' }, - { type: 'doc', id: 'data-provision/stats' }, + { type: 'doc', id: 'data-provision/stats' } ] }, - { - type: 'doc', - id: 'services' - }, { type: "link", label: "Learning center", @@ -125,10 +164,10 @@ const sidebars = { id: 'publications', label: "Relevant publications" }, - { - type: 'doc', - id: 'faq' - }, + // { + // type: 'doc', + // id: 'faq' + // }, { type: 'doc', id: 'license' diff --git a/src/css/custom.css b/src/css/custom.css index 27cefe3..b6455c2 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -5,58 +5,66 @@ */ /* You can override the default Infima variables here. */ -/* -:root { - --ifm-color-primary: #2e8555; - --ifm-color-primary-dark: #29784c; - --ifm-color-primary-darker: #277148; - --ifm-color-primary-darkest: #205d3b; - --ifm-color-primary-light: #33925d; - --ifm-color-primary-lighter: #359962; - --ifm-color-primary-lightest: #3cad6e; - --ifm-code-font-size: 95%; - --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); -} -*/ - -/* For readability concerns, you should choose a lighter palette in dark mode. */ -/* -[data-theme='dark'] { - --ifm-color-primary: #25c2a0; - --ifm-color-primary-dark: #21af90; - --ifm-color-primary-darker: #1fa588; - --ifm-color-primary-darkest: #1a8870; - --ifm-color-primary-light: #29d5b0; - --ifm-color-primary-lighter: #32d8b4; - --ifm-color-primary-lightest: #4fddbf; - --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); -} -*/ :root { - --ifm-color-primary: #4666ca; - --ifm-color-primary-dark: #3757be; - --ifm-color-primary-darker: #3353b4; - --ifm-color-primary-darkest: #2a4494; - --ifm-color-primary-light: #5b77d0; - --ifm-color-primary-lighter: #6680d3; - --ifm-color-primary-lightest: #859adc; + --ifm-color-primary: #e6122e; + --ifm-color-primary-dark: #cf1029; + --ifm-color-primary-darker: #c30f27; + --ifm-color-primary-darkest: #a10d20; + --ifm-color-primary-light: #ee233e; + --ifm-color-primary-lighter: #ef2f48; + --ifm-color-primary-lightest: #f15166; + --ifm-background-color: #F5F5F5; + --ifm-navbar-background-color: #fff; --ifm-code-font-size: 95%; --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); } [data-theme='dark'] { - --ifm-color-primary: #5dade2; - --ifm-color-primary-dark: #429fdd; - --ifm-color-primary-darker: #3498db; - --ifm-color-primary-darkest: #227fbd; - --ifm-color-primary-light: #78bbe7; - --ifm-color-primary-lighter: #86c2e9; - --ifm-color-primary-lightest: #aed6f1; + --ifm-color-primary: #f15166; + --ifm-color-primary-dark: #ef334c; + --ifm-color-primary-darker: #ed243f; + --ifm-color-primary-darkest: #d1112a; + --ifm-color-primary-light: #f36f80; + --ifm-color-primary-lighter: #f57e8d; + --ifm-color-primary-lightest: #f8aab5; + --ifm-background-color: #2c2e3a; + --ifm-navbar-background-color: #2c2e3a; --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); } +.navbar__logo { + height: 2.5rem; +} .todo { background-color: yellow; -} \ No newline at end of file +} + +@media (min-width: 996px) { + + .left-badge { + padding-right: 5px; + } + + .mid-badge { + padding-left: 0; + padding-right: 5px; + } + + .right-badge { + padding-left: 0; + } +} + +.dark-badge { + background-color: #c6c6c6; +} + +.footer { + background-color: var(--ifm-navbar-background-color); + padding-bottom: 2em; + padding-top: 1em; + height: var(--ifm-navbar-height); +} + diff --git a/static/bibtex/OpenAIRE_Research_Graph_dump.bib b/static/bibtex/OpenAIRE_Research_Graph_dump.bib new file mode 100644 index 0000000..2e8f5d3 --- /dev/null +++ b/static/bibtex/OpenAIRE_Research_Graph_dump.bib @@ -0,0 +1,33 @@ +@dataset{manghi_paolo_2022_6616871, + author = {Manghi, Paolo and + Atzori, Claudio and + Bardi, Alessia and + Baglioni, Miriam and + Schirrwagen, Jochen and + Dimitropoulos, Harry and + La Bruzzo, Sandro and + Foufoulas, Ioannis and + Mannocci, Andrea and + Horst, Marek and + Czerniak, Andreas and + Kiatropoulou, Katerina and + Kokogiannaki, Argiro and + De Bonis, Michele and + Artini, Michele and + Ottonello, Enrico and + Lempesis, Antonis and + Ioannidis, Alexandros and + Manola, Natalia and + Principe, Pedro}, + title = {OpenAIRE Research Graph Dump}, + month = Jun, + year = 2022, + note = {{A new version of this dataset is published every 6 + months. The content available on the OpenAIRE + EXPLORE and CONNECT portals might be more up-to- + date with respect to the data you find here.}}, + publisher = {Zenodo}, + version = {4.1}, + doi = {10.5281/zenodo.6616871}, + url = {https://doi.org/10.5281/zenodo.6616871} +} \ No newline at end of file diff --git a/static/img/docs/aggregation.png b/static/img/docs/aggregation.png deleted file mode 100644 index bd6dd19..0000000 Binary files a/static/img/docs/aggregation.png and /dev/null differ diff --git a/static/img/docs/architecture.png b/static/img/docs/architecture.png deleted file mode 100644 index 8db82ef..0000000 Binary files a/static/img/docs/architecture.png and /dev/null differ diff --git a/static/img/docs/decisiontree-dataset-orp.png b/static/img/docs/decisiontree-dataset-orp.png deleted file mode 100644 index cf12130..0000000 Binary files a/static/img/docs/decisiontree-dataset-orp.png and /dev/null differ diff --git a/static/img/docs/decisiontree-organization.png b/static/img/docs/decisiontree-organization.png deleted file mode 100644 index c3a2a56..0000000 Binary files a/static/img/docs/decisiontree-organization.png and /dev/null differ diff --git a/static/img/docs/decisiontree-publication.png b/static/img/docs/decisiontree-publication.png deleted file mode 100644 index aa70343..0000000 Binary files a/static/img/docs/decisiontree-publication.png and /dev/null differ diff --git a/static/img/docs/decisiontree-software.png b/static/img/docs/decisiontree-software.png deleted file mode 100644 index 23c6812..0000000 Binary files a/static/img/docs/decisiontree-software.png and /dev/null differ diff --git a/static/img/docs/dedup-results.png b/static/img/docs/dedup-results.png deleted file mode 100644 index d8fdda2..0000000 Binary files a/static/img/docs/dedup-results.png and /dev/null differ diff --git a/static/img/docs/deduplication-workflow.png b/static/img/docs/deduplication-workflow.png deleted file mode 100644 index ae26ca2..0000000 Binary files a/static/img/docs/deduplication-workflow.png and /dev/null differ diff --git a/static/img/docusaurus.png b/static/img/docusaurus.png deleted file mode 100644 index f458149..0000000 Binary files a/static/img/docusaurus.png and /dev/null differ diff --git a/static/img/logo.png b/static/img/logo.png index aeffc3d..f50763e 100644 Binary files a/static/img/logo.png and b/static/img/logo.png differ diff --git a/static/img/undraw_docusaurus_mountain.svg b/static/img/undraw_docusaurus_mountain.svg deleted file mode 100644 index af961c4..0000000 --- a/static/img/undraw_docusaurus_mountain.svg +++ /dev/null @@ -1,171 +0,0 @@ - diff --git a/static/img/undraw_docusaurus_react.svg b/static/img/undraw_docusaurus_react.svg deleted file mode 100644 index 94b5cf0..0000000 --- a/static/img/undraw_docusaurus_react.svg +++ /dev/null @@ -1,170 +0,0 @@ - diff --git a/static/img/undraw_docusaurus_tree.svg b/static/img/undraw_docusaurus_tree.svg deleted file mode 100644 index d9161d3..0000000 --- a/static/img/undraw_docusaurus_tree.svg +++ /dev/null @@ -1,40 +0,0 @@ -