diff --git a/.env b/.env index a763c08..9ed2036 100644 --- a/.env +++ b/.env @@ -1,2 +1,2 @@ URL="http://snf-23385.ok-kno.grnetcloud.net" -BASE_URL="/docs" +BASE_URL="/" diff --git a/docs/data-model/entities/other.md b/docs/data-model/entities/other.md index cd12f18..adb9d97 100644 --- a/docs/data-model/entities/other.md +++ b/docs/data-model/entities/other.md @@ -647,11 +647,11 @@ A measure computed for this instance (e.g. those provided by [BIP! Finder](https _Type: String • Cardinality: ONE_ The specified measure. Currently supported one of: -* `influence` (see [PageRank](/data-provision/enrichment/impact-scores#pagerank-pr)) -* `influence_alt` (see [Citation Count](/data-provision/enrichment/impact-scores#citation-count-cc)) -* `popularity` (see [AttRank](/data-provision/enrichment/impact-scores#attrank)) -* `popularity_alt` (see [RAM](/data-provision/enrichment/impact-scores#ram)) -* `impulse` (see ["Incubation" Citation Count](/data-provision/enrichment/impact-scores#incubation-citation-count-icc)) +* `influence` (see [PageRank](/data-provision/indicators-ingestion/impact-scores#pagerank-pr)) +* `influence_alt` (see [Citation Count](/data-provision/indicators-ingestion/impact-scores#citation-count-cc)) +* `popularity` (see [AttRank](/data-provision/indicators-ingestion/impact-scores#attrank)) +* `popularity_alt` (see [RAM](/data-provision/indicators-ingestion/impact-scores#ram)) +* `impulse` (see ["Incubation" Citation Count](/data-provision/indicators-ingestion/impact-scores#incubation-citation-count-icc)) ```json "key": "influence" diff --git a/docs/data-provision/aggregation/compatible-sources.md b/docs/data-provision/aggregation/compatible-sources.md new file mode 100644 index 0000000..3d6ac0b --- /dev/null +++ b/docs/data-provision/aggregation/compatible-sources.md @@ -0,0 +1,5 @@ +--- +sidebar_position: 1 +--- + +# OpenAIRE compatible sources \ No newline at end of file diff --git a/docs/data-provision/aggregation/datacite.md b/docs/data-provision/aggregation/non-compatible-sources/datacite.md similarity index 100% rename from docs/data-provision/aggregation/datacite.md rename to docs/data-provision/aggregation/non-compatible-sources/datacite.md diff --git a/docs/data-provision/aggregation/doiboost.md b/docs/data-provision/aggregation/non-compatible-sources/doiboost.md similarity index 100% rename from docs/data-provision/aggregation/doiboost.md rename to docs/data-provision/aggregation/non-compatible-sources/doiboost.md diff --git a/docs/data-provision/aggregation/ebi.md b/docs/data-provision/aggregation/non-compatible-sources/ebi.md similarity index 100% rename from docs/data-provision/aggregation/ebi.md rename to docs/data-provision/aggregation/non-compatible-sources/ebi.md diff --git a/docs/data-provision/aggregation/pubmed.md b/docs/data-provision/aggregation/non-compatible-sources/pubmed.md similarity index 100% rename from docs/data-provision/aggregation/pubmed.md rename to docs/data-provision/aggregation/non-compatible-sources/pubmed.md diff --git a/docs/data-provision/cleaning.md b/docs/data-provision/cleaning.md new file mode 100644 index 0000000..81b62dd --- /dev/null +++ b/docs/data-provision/cleaning.md @@ -0,0 +1 @@ +# Cleaning \ No newline at end of file diff --git a/docs/data-provision/data-provision.md b/docs/data-provision/data-provision.md index ea103c3..95cd3bc 100644 --- a/docs/data-provision/data-provision.md +++ b/docs/data-provision/data-provision.md @@ -1,7 +1,7 @@ -# Data provision +# Graph production workflow OpenAIRE collects metadata records from more than 70K scholarly communication sources from all over the world, including Open Access institutional repositories, data archives, journals. All the metadata records (i.e. descriptions of research products) are put together in a data lake, together with records from Crossref, Unpaywall, ORCID, Grid.ac, and information about projects provided by national and international funders. Dedicated inference algorithms applied to metadata and to the full-texts of Open Access publications enrich the content of the data lake with links between research results and projects, author affiliations, subject classification, links to entries from domain-specific databases. Duplicated organisations and results are identified and merged together to obtain an open, trusted, public resource enabling explorations of the scholarly communication landscape like never before.

- Data provision + Data provision

diff --git a/docs/data-provision/enrichment/bulk-tagging.md b/docs/data-provision/enrichment-by-deduction-and-propagation/bulk-tagging.md similarity index 99% rename from docs/data-provision/enrichment/bulk-tagging.md rename to docs/data-provision/enrichment-by-deduction-and-propagation/bulk-tagging.md index 3f1f4ac..b13c767 100644 --- a/docs/data-provision/enrichment/bulk-tagging.md +++ b/docs/data-provision/enrichment-by-deduction-and-propagation/bulk-tagging.md @@ -1,5 +1,4 @@ - -# Bulk Tagging/Deduction +# Deduction The Deduction process (also known as “bulk tagging”) enriches each record with new information that can be derived from the existing property values. diff --git a/docs/data-provision/enrichment/propagation.md b/docs/data-provision/enrichment-by-deduction-and-propagation/propagation.md similarity index 100% rename from docs/data-provision/enrichment/propagation.md rename to docs/data-provision/enrichment-by-deduction-and-propagation/propagation.md diff --git a/docs/data-provision/enrichment/_category_.json b/docs/data-provision/enrichment-by-mining/_category_.json similarity index 100% rename from docs/data-provision/enrichment/_category_.json rename to docs/data-provision/enrichment-by-mining/_category_.json diff --git a/docs/data-provision/enrichment/acks.md b/docs/data-provision/enrichment-by-mining/acks.md similarity index 100% rename from docs/data-provision/enrichment/acks.md rename to docs/data-provision/enrichment-by-mining/acks.md diff --git a/docs/data-provision/enrichment/affiliation_matching.md b/docs/data-provision/enrichment-by-mining/affiliation_matching.md similarity index 100% rename from docs/data-provision/enrichment/affiliation_matching.md rename to docs/data-provision/enrichment-by-mining/affiliation_matching.md diff --git a/docs/data-provision/enrichment/citation_matching.md b/docs/data-provision/enrichment-by-mining/citation_matching.md similarity index 100% rename from docs/data-provision/enrichment/citation_matching.md rename to docs/data-provision/enrichment-by-mining/citation_matching.md diff --git a/docs/data-provision/enrichment/cites.md b/docs/data-provision/enrichment-by-mining/cites.md similarity index 100% rename from docs/data-provision/enrichment/cites.md rename to docs/data-provision/enrichment-by-mining/cites.md diff --git a/docs/data-provision/enrichment/classifies.md b/docs/data-provision/enrichment-by-mining/classifies.md similarity index 100% rename from docs/data-provision/enrichment/classifies.md rename to docs/data-provision/enrichment-by-mining/classifies.md diff --git a/docs/data-provision/enrichment/documents_similarity.md b/docs/data-provision/enrichment-by-mining/documents_similarity.md similarity index 100% rename from docs/data-provision/enrichment/documents_similarity.md rename to docs/data-provision/enrichment-by-mining/documents_similarity.md diff --git a/docs/data-provision/enrichment/img.png b/docs/data-provision/enrichment-by-mining/img.png similarity index 100% rename from docs/data-provision/enrichment/img.png rename to docs/data-provision/enrichment-by-mining/img.png diff --git a/docs/data-provision/enrichment/metadata_extraction.md b/docs/data-provision/enrichment-by-mining/metadata_extraction.md similarity index 100% rename from docs/data-provision/enrichment/metadata_extraction.md rename to docs/data-provision/enrichment-by-mining/metadata_extraction.md diff --git a/docs/data-provision/post-cleaning.md b/docs/data-provision/finalisation.md similarity index 96% rename from docs/data-provision/post-cleaning.md rename to docs/data-provision/finalisation.md index b223fb4..b08dff7 100644 --- a/docs/data-provision/post-cleaning.md +++ b/docs/data-provision/finalisation.md @@ -1,8 +1,4 @@ ---- -sidebar_position: 4 ---- - -# Post cleaning +# Finalisation At the very end of the processing pipeline, a step is dedicated to perform cleaning operations aimed at improving the overall quality of the data. The output of this final cleansing step is the final version of the OpenAIRE Graph. @@ -47,7 +43,7 @@ Bibliographic records that do not meet minimal requirements for being part of th Currently, the only criteria applied horizontally to the entire graph aims at excluding scientific results whose title is not meaningful for citation purposes. Then, different criteria are applied in the pre-processing of specific sub-collections: -* [Crossref filtering](/data-provision/aggregation/doiboost#crossref-filtering) +* [Crossref filtering](/data-provision/aggregation/non-compatible-sources/doiboost#crossref-filtering) ## Country cleaning diff --git a/docs/data-provision/indexing.md b/docs/data-provision/indexing.md index f5b2e21..ecc677c 100644 --- a/docs/data-provision/indexing.md +++ b/docs/data-provision/indexing.md @@ -1,7 +1,3 @@ ---- -sidebar_position: 5 ---- - # Indexing The final version of the OpenAIRE Graph is indexed on a Solr server that is used by the OpenAIRE portals (EXPLORE, CONNECT, PROVIDE) and APIs, the latter adopted by several third-party applications and organizations, such as: diff --git a/docs/data-provision/enrichment/impact-scores.md b/docs/data-provision/indicators-ingestion/impact-scores.md similarity index 99% rename from docs/data-provision/enrichment/impact-scores.md rename to docs/data-provision/indicators-ingestion/impact-scores.md index d3db939..7754a04 100644 --- a/docs/data-provision/enrichment/impact-scores.md +++ b/docs/data-provision/indicators-ingestion/impact-scores.md @@ -1,7 +1,3 @@ ---- -sidebar_position: 2 ---- - # Impact indicators This page summarises all calculated impact indicators, which are included into the [measure](/data-model/entities/other#measure) property. diff --git a/docs/data-provision/indicators-ingestion/usage-counts.md b/docs/data-provision/indicators-ingestion/usage-counts.md new file mode 100644 index 0000000..b0325c2 --- /dev/null +++ b/docs/data-provision/indicators-ingestion/usage-counts.md @@ -0,0 +1 @@ +# Usage counts \ No newline at end of file diff --git a/docs/data-provision/merge-by-id.md b/docs/data-provision/merge-by-id.md new file mode 100644 index 0000000..1b6bfa5 --- /dev/null +++ b/docs/data-provision/merge-by-id.md @@ -0,0 +1 @@ +# Merge by id \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index aadca39..37abdad 100644 --- a/sidebars.js +++ b/sidebars.js @@ -66,7 +66,7 @@ const sidebars = { }, { type: 'category', - label: "Data provision", + label: "Graph production workflow", link: {type: 'doc', id: 'data-provision/data-provision'}, items: [ { @@ -74,12 +74,46 @@ const sidebars = { label: "Aggregation", link: {type: 'doc', id: 'data-provision/aggregation/aggregation'}, items: [ - { type: 'doc', id: 'data-provision/aggregation/doiboost', label: 'DOIBoost' }, - { type: 'doc', id: 'data-provision/aggregation/pubmed' }, - { type: 'doc', id: 'data-provision/aggregation/datacite' }, - { type: 'doc', id: 'data-provision/aggregation/ebi', label: 'EMBL-EBI' }, + { + type: 'doc', + label: "OpenAIRE compatible sources", + id: 'data-provision/aggregation/compatible-sources', + }, + { + type: 'category', + label: "Non-compatible sources", + link: { type: 'generated-index' }, + items: [ + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/doiboost', label: 'DOIBoost' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/pubmed' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/datacite' }, + { type: 'doc', id: 'data-provision/aggregation/non-compatible-sources/ebi', label: 'EMBL-EBI' }, + ] + } ] }, + { + type: 'doc', + id: 'data-provision/merge-by-id' + }, + { + type: 'category', + label: "Enrichment by mining", + link: { + type: 'generated-index', + description: 'The OpenAIRE Graph is enriched using the different Text and Data Mining (TDM) algorithms that are grouped in the following categories.' + }, + items: [ + { type: 'doc', id: 'data-provision/enrichment-by-mining/affiliation_matching' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/citation_matching' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/classifies' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/documents_similarity' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/acks' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/cites' }, + { type: 'doc', id: 'data-provision/enrichment-by-mining/metadata_extraction' }, + ] + }, + { type: 'doc', id: 'data-provision/cleaning' }, { type: 'category', label: "Deduplication", @@ -90,38 +124,32 @@ const sidebars = { ] }, { - type: 'category', - label: "Enrichment", - link: { - type: 'generated-index', - description: 'The OpenAIRE Graph is enriched using the different processes that we describe in this section.' + type: 'category', + label: "Enrichment by deduplication & propagation", + link: { + type: 'generated-index' , + description: 'The OpenAIRE Graph is further enriched by the deduction and propagation processes descibed in this section.' + }, items: [ - { - type: 'category', - label: "Mining", - link: { - type: 'generated-index', - description: 'The Text and Data Mining (TDM) algorithms used for enriching the OpenAIRE Graph are grouped in the following main categories:' - }, - items: [ - { type: 'doc', id: 'data-provision/enrichment/affiliation_matching' }, - { type: 'doc', id: 'data-provision/enrichment/citation_matching' }, - { type: 'doc', id: 'data-provision/enrichment/classifies' }, - { type: 'doc', id: 'data-provision/enrichment/documents_similarity' }, - { type: 'doc', id: 'data-provision/enrichment/acks' }, - - { type: 'doc', id: 'data-provision/enrichment/cites' }, - - { type: 'doc', id: 'data-provision/enrichment/metadata_extraction' }, - ] - }, - { type: 'doc', id: 'data-provision/enrichment/bulk-tagging' }, - { type: 'doc', id: 'data-provision/enrichment/propagation' }, - { type: 'doc', id: 'data-provision/enrichment/impact-scores' }, + { type: 'doc', id: 'data-provision/enrichment-by-deduction-and-propagation/bulk-tagging' }, + { type: 'doc', id: 'data-provision/enrichment-by-deduction-and-propagation/propagation' }, ] }, - { type: 'doc', id: 'data-provision/post-cleaning' }, + { + type: 'category', + label: "Indicators ingestion", + link: { + type: 'generated-index' , + description: 'In this step, the following types of indicators are ingested in the OpenAIRE Graph.' + + }, + items: [ + { type: 'doc', id: 'data-provision/indicators-ingestion/impact-scores' }, + { type: 'doc', id: 'data-provision/indicators-ingestion/usage-counts' }, + ] + }, + { type: 'doc', id: 'data-provision/finalisation' }, { type: 'doc', id: 'data-provision/indexing' }, ] }, diff --git a/static/img/docs/architecture.old.png b/static/img/docs/architecture.old.png new file mode 100644 index 0000000..9e174db Binary files /dev/null and b/static/img/docs/architecture.old.png differ diff --git a/static/img/docs/architecture.png b/static/img/docs/architecture.png index 9e174db..c8c6e71 100644 Binary files a/static/img/docs/architecture.png and b/static/img/docs/architecture.png differ