diff --git a/docs/assets/openaire-badge-1.png b/docs/assets/badges/openaire-badge-1.png similarity index 100% rename from docs/assets/openaire-badge-1.png rename to docs/assets/badges/openaire-badge-1.png diff --git a/docs/assets/openaire-badge-1.zip b/docs/assets/badges/openaire-badge-1.zip similarity index 100% rename from docs/assets/openaire-badge-1.zip rename to docs/assets/badges/openaire-badge-1.zip diff --git a/docs/assets/openaire-badge-2.png b/docs/assets/badges/openaire-badge-2.png similarity index 100% rename from docs/assets/openaire-badge-2.png rename to docs/assets/badges/openaire-badge-2.png diff --git a/docs/assets/openaire-badge-2.zip b/docs/assets/badges/openaire-badge-2.zip similarity index 100% rename from docs/assets/openaire-badge-2.zip rename to docs/assets/badges/openaire-badge-2.zip diff --git a/docs/assets/openaire-badge-3.png b/docs/assets/badges/openaire-badge-3.png similarity index 100% rename from docs/assets/openaire-badge-3.png rename to docs/assets/badges/openaire-badge-3.png diff --git a/docs/assets/openaire-badge-3.zip b/docs/assets/badges/openaire-badge-3.zip similarity index 100% rename from docs/assets/openaire-badge-3.zip rename to docs/assets/badges/openaire-badge-3.zip diff --git a/static/img/docs/aggregation.png b/docs/assets/img/aggregation.png similarity index 100% rename from static/img/docs/aggregation.png rename to docs/assets/img/aggregation.png diff --git a/docs/assets/img/architecture.png b/docs/assets/img/architecture.png new file mode 100644 index 0000000..ad920fb Binary files /dev/null and b/docs/assets/img/architecture.png differ diff --git a/static/img/docs/data-model.png b/docs/assets/img/data-model.png similarity index 100% rename from static/img/docs/data-model.png rename to docs/assets/img/data-model.png diff --git a/static/img/docs/decisiontree-dataset-orp.png b/docs/assets/img/decisiontree-dataset-orp.png similarity index 100% rename from static/img/docs/decisiontree-dataset-orp.png rename to docs/assets/img/decisiontree-dataset-orp.png diff --git a/static/img/docs/decisiontree-organization.png b/docs/assets/img/decisiontree-organization.png similarity index 100% rename from static/img/docs/decisiontree-organization.png rename to docs/assets/img/decisiontree-organization.png diff --git a/static/img/docs/decisiontree-publication.png b/docs/assets/img/decisiontree-publication.png similarity index 100% rename from static/img/docs/decisiontree-publication.png rename to docs/assets/img/decisiontree-publication.png diff --git a/static/img/docs/decisiontree-software.png b/docs/assets/img/decisiontree-software.png similarity index 100% rename from static/img/docs/decisiontree-software.png rename to docs/assets/img/decisiontree-software.png diff --git a/static/img/docs/dedup-results.png b/docs/assets/img/dedup-results.png similarity index 100% rename from static/img/docs/dedup-results.png rename to docs/assets/img/dedup-results.png diff --git a/static/img/docs/deduplication-workflow.png b/docs/assets/img/deduplication-workflow.png similarity index 100% rename from static/img/docs/deduplication-workflow.png rename to docs/assets/img/deduplication-workflow.png diff --git a/static/img/docs/enrichment/bulktagging_datasource.png b/docs/assets/img/enrichment/bulktagging_datasource.png similarity index 100% rename from static/img/docs/enrichment/bulktagging_datasource.png rename to docs/assets/img/enrichment/bulktagging_datasource.png diff --git a/static/img/docs/enrichment/bulktagging_selconstraints.png b/docs/assets/img/enrichment/bulktagging_selconstraints.png similarity index 100% rename from static/img/docs/enrichment/bulktagging_selconstraints.png rename to docs/assets/img/enrichment/bulktagging_selconstraints.png diff --git a/static/img/docs/enrichment/bulktagging_subject.png b/docs/assets/img/enrichment/bulktagging_subject.png similarity index 100% rename from static/img/docs/enrichment/bulktagging_subject.png rename to docs/assets/img/enrichment/bulktagging_subject.png diff --git a/static/img/docs/enrichment/bulktagging_zenodo.png b/docs/assets/img/enrichment/bulktagging_zenodo.png similarity index 100% rename from static/img/docs/enrichment/bulktagging_zenodo.png rename to docs/assets/img/enrichment/bulktagging_zenodo.png diff --git a/static/img/docs/enrichment/organization_tree.png b/docs/assets/img/enrichment/organization_tree.png similarity index 100% rename from static/img/docs/enrichment/organization_tree.png rename to docs/assets/img/enrichment/organization_tree.png diff --git a/static/img/docs/enrichment/propagation_affiliationistrepo.png b/docs/assets/img/enrichment/propagation_affiliationistrepo.png similarity index 100% rename from static/img/docs/enrichment/propagation_affiliationistrepo.png rename to docs/assets/img/enrichment/propagation_affiliationistrepo.png diff --git a/static/img/docs/enrichment/propagation_country.png b/docs/assets/img/enrichment/propagation_country.png similarity index 100% rename from static/img/docs/enrichment/propagation_country.png rename to docs/assets/img/enrichment/propagation_country.png diff --git a/static/img/docs/enrichment/propagation_orcid.png b/docs/assets/img/enrichment/propagation_orcid.png similarity index 100% rename from static/img/docs/enrichment/propagation_orcid.png rename to docs/assets/img/enrichment/propagation_orcid.png diff --git a/static/img/docs/enrichment/propagation_organizationsemrel.png b/docs/assets/img/enrichment/propagation_organizationsemrel.png similarity index 100% rename from static/img/docs/enrichment/propagation_organizationsemrel.png rename to docs/assets/img/enrichment/propagation_organizationsemrel.png diff --git a/static/img/docs/enrichment/propagation_resulttocommunitythroughorganization.png b/docs/assets/img/enrichment/propagation_resulttocommunitythroughorganization.png similarity index 100% rename from static/img/docs/enrichment/propagation_resulttocommunitythroughorganization.png rename to docs/assets/img/enrichment/propagation_resulttocommunitythroughorganization.png diff --git a/static/img/docs/enrichment/propagation_resulttocommunitythroughsemrel.png b/docs/assets/img/enrichment/propagation_resulttocommunitythroughsemrel.png similarity index 100% rename from static/img/docs/enrichment/propagation_resulttocommunitythroughsemrel.png rename to docs/assets/img/enrichment/propagation_resulttocommunitythroughsemrel.png diff --git a/static/img/docs/enrichment/propagation_resulttoproject.png b/docs/assets/img/enrichment/propagation_resulttoproject.png similarity index 100% rename from static/img/docs/enrichment/propagation_resulttoproject.png rename to docs/assets/img/enrichment/propagation_resulttoproject.png diff --git a/docs/data-model/data-model.md b/docs/data-model/data-model.md index 50a6a52..d8cbe2e 100644 --- a/docs/data-model/data-model.md +++ b/docs/data-model/data-model.md @@ -5,7 +5,7 @@ The OpenAIRE Graph comprises several types of [entities](../category/entities) a The latest version of the JSON schema can be found on the [Downloads](../downloads/full-graph) section.
- +
The figure above, presents the graph's data model. diff --git a/docs/data-provision/aggregation/aggregation.md b/docs/data-provision/aggregation/aggregation.md index 037d98e..ac966fc 100644 --- a/docs/data-provision/aggregation/aggregation.md +++ b/docs/data-provision/aggregation/aggregation.md @@ -14,7 +14,7 @@ The transformation process includes the application of cleaning functions whose In addition, the OpenAIRE Graph is extended with other relevant scholarly communication sources that need special handling, either because they do not strictly follow the OpenAIRE Guidelines or due to the vast amount of data of data they offer (e.g. DOIBoost, that merges Crossref, ORCID, Microsoft Academic Graph, and Unpaywall).- +
The OpenAIRE aggregation system collects information about objects of the research life-cycle compliant to the [OpenAIRE acquisition policy](https://www.openaire.eu/content-acquisition-policy) from [different types of data sources](https://explore.openaire.eu/search/find/dataproviders): diff --git a/docs/data-provision/data-provision.md b/docs/data-provision/data-provision.md index 95cd3bc..27114ec 100644 --- a/docs/data-provision/data-provision.md +++ b/docs/data-provision/data-provision.md @@ -3,5 +3,6 @@ OpenAIRE collects metadata records from more than 70K scholarly communication sources from all over the world, including Open Access institutional repositories, data archives, journals. All the metadata records (i.e. descriptions of research products) are put together in a data lake, together with records from Crossref, Unpaywall, ORCID, Grid.ac, and information about projects provided by national and international funders. Dedicated inference algorithms applied to metadata and to the full-texts of Open Access publications enrich the content of the data lake with links between research results and projects, author affiliations, subject classification, links to entries from domain-specific databases. Duplicated organisations and results are identified and merged together to obtain an open, trusted, public resource enabling explorations of the scholarly communication landscape like never before.- +
+ diff --git a/docs/data-provision/deduction-and-propagation/bulk-tagging.md b/docs/data-provision/deduction-and-propagation/bulk-tagging.md index b13c767..f52188e 100644 --- a/docs/data-provision/deduction-and-propagation/bulk-tagging.md +++ b/docs/data-provision/deduction-and-propagation/bulk-tagging.md @@ -8,13 +8,13 @@ As of November 2022, three procedures are in place to relate a research product * subjects: it is possible to specify a list of subjects that are relevant for the RC/RI. Every time one of the subjects is found among the subjects of a result, the result is linked to the RC/RI.- +
* data sources: it is possible to list a set of data sources relevant for the RC/RI. All the results collected from these data sources will be linked to the RC/RI- +
When only some results collected from a datasource are relevant for the RC/RI, it is possible to specify a set of selection constraints (SC) that have to be verified before linking the result to the @@ -23,14 +23,14 @@ while the set of condition can be among V={contains, equals, not_contain A possible selection criteria can be: “All the products whose contributor contains DARIAH “- +
* Zenodo community: it is possible to list a set of Zenodo communities relevant for the RC/RI. All the products collected from the listed Zenodo communities are linked to the RC/RI- +
diff --git a/docs/data-provision/deduction-and-propagation/propagation.md b/docs/data-provision/deduction-and-propagation/propagation.md index 604dafa..79f0902 100644 --- a/docs/data-provision/deduction-and-propagation/propagation.md +++ b/docs/data-provision/deduction-and-propagation/propagation.md @@ -7,38 +7,39 @@ As of November 2022, the following procedures are in place: * Country propagation: updates the property “country” of a results. This happens when the result is collected from an institutional datasource or when the datasource hosting the result is inserted in a whitelist. For all the results whose hosting datasource verifies one of the conditions above, the country of the organization providing the datasource is added to the country of the result: e.g. publication collected from an institutional repository maintained by an italian university will be enriched with the property “country = IT”.- +
* Project propagation: adds a "isProducedBy" relationship (and its inverse) between a Project P and Result R1, if R1 has a strong semantic relationship with another Result R2 and P produces R2: e.g. publication linked to project P “is supplemented by” a dataset D. Dataset D will get the link to project P. The relationships considered for this procedure are “isSupplementedBy” and “isSupplementTo”.- +
* Result to RC/RI through organization propagation. The manager of the RC/RI can specify a set of organizations whose product are relevant for the community. Each result having such a relation of affiliation with at least one organization relevant for the RC/RI will be linked to it.- +
* Result to RC/RI through semantic relation: extends the set of products linked to a RC/RI by exploiting strong semantic relationships between the results; e.g. if a result R1 is associated to the community C and is supplemented by a result R2 then the result R2 will be linked to the community. The relationships considered for this procedure are “isSupplementedBy” and “supplements”.- +
* ORCID identifiers to result through semantic relation. This propagation enriches the results by adding ORCID identifiers to authors. The added ORCID will be marked as "potential" since they have been inserted through propagation. The process considers the set of overlapping authors between results (R1 and R2) linked with a strong semantic relationship (IsSupplementedBy, IsSupplementTo). For each author A in the overlapping set, if R1 provides the ORCID value for A and R2 does not, then the author A in R2 will be enriched with the information of the ORCID found in R1.- +
* affiliation to organization through institutional repository. This propagation adds one "hasAuthorInstitution" relationship (and its inverse) between a Result R and Organization O, if R was collected from a datasource D with type institutional repository, and D was provided by O.- +
* affiliation to organization through semantic relation. This propagation adds one "hasAuthorInstitution" relationship (and its inverse) between a @@ -46,9 +47,9 @@ Result R and an Organization O, if R has an affiliation relation with an organization O1 that is in relation "isChildOf" with O.- +
The algorithm exploits only the organization leaves that are in a "IsChildOf" relation with another organization. So far one single step is done- +
\ No newline at end of file diff --git a/docs/data-provision/deduplication/deduplication.md b/docs/data-provision/deduplication/deduplication.md index 53d6906..8fb118a 100644 --- a/docs/data-provision/deduplication/deduplication.md +++ b/docs/data-provision/deduplication/deduplication.md @@ -10,7 +10,7 @@ The deduplication process can be divided into three different phases: * Duplicates grouping (transitive closure)- +
### Candidate identification (clustering) diff --git a/docs/data-provision/deduplication/organizations.md b/docs/data-provision/deduplication/organizations.md index a0b029b..c2c57e1 100644 --- a/docs/data-provision/deduplication/organizations.md +++ b/docs/data-provision/deduplication/organizations.md @@ -43,7 +43,7 @@ The comparison goes through the following decision tree: 5. *legalname check*: comparison of the normalized `legalnames` with the `Jaro-Winkler` distance to determine if it is higher than `0.9`. If so, a similarity relation is drawn. Otherwise, no similarity relation is drawn.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/1YKInGGtHu09QG4pT2gRLEum4LxU82d4nKkvGNvRQmrg/edit?usp=sharing) diff --git a/docs/data-provision/deduplication/research-products.md b/docs/data-provision/deduplication/research-products.md index 3000e24..4d68c25 100644 --- a/docs/data-provision/deduplication/research-products.md +++ b/docs/data-provision/deduplication/research-products.md @@ -34,7 +34,7 @@ The comparison goes through different stages: 5. *strong check*: comparison composed by three substages involving the (i) comparison of the author list sizes and the version of the record to determine if they are coherent, (ii) comparison of the record titles with the Levenshtein distance to determine if it is higher than 0.99, (iii) "smart" comparison of the author lists to check if common authors are more than 60%.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/19SIilTp1vukw6STMZuPMdc0pv0ODYCiOxP7OU3iPWK8/edit?usp=sharing) @@ -47,7 +47,7 @@ The comparison goes through different stages: 3. *strong check*: comparison of the record titles with Levenshtein distance. If the measure is above 0.99, then the similarity relation is drawn- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/19gd1-GTOEEo6awMObGRkYFhpAlO_38mfbDFFX0HAkuo/edit?usp=sharing) @@ -57,7 +57,7 @@ For each pair of datasets or other types of research products in a cluster the s The decision tree is almost identical to the publication decision tree, with the only exception of the *instance type check* stage. Since such type of record does not have a relatable instance type, the check is not performed and the decision tree node is skipped.- +
[//]: # (Link to the image: https://docs.google.com/drawings/d/1uBa7Bw2KwBRDUYIfyRr_Keol7UOeyvMNN7MPXYLg4qw/edit?usp=sharing) diff --git a/docs/downloads/full-graph.md b/docs/downloads/full-graph.md index 97ddbfa..4b338b2 100644 --- a/docs/downloads/full-graph.md +++ b/docs/downloads/full-graph.md @@ -31,18 +31,18 @@ Also consider adding one of the following badges to your service with the approp diff --git a/static/bibtex/OpenAIRE_Research_Graph_dump.bib b/static/bibtex/OpenAIRE_Research_Graph_dump.bib index 2bd491d..2e8f5d3 100644 --- a/static/bibtex/OpenAIRE_Research_Graph_dump.bib +++ b/static/bibtex/OpenAIRE_Research_Graph_dump.bib @@ -20,7 +20,7 @@ Manola, Natalia and Principe, Pedro}, title = {OpenAIRE Research Graph Dump}, - month = jun, + month = Jun, year = 2022, note = {{A new version of this dataset is published every 6 months. The content available on the OpenAIRE diff --git a/static/img/docs/architecture.old.png b/static/img/docs/architecture.old.png deleted file mode 100644 index 9e174db..0000000 Binary files a/static/img/docs/architecture.old.png and /dev/null differ diff --git a/static/img/docs/architecture.png b/static/img/docs/architecture.png deleted file mode 100644 index c8c6e71..0000000 Binary files a/static/img/docs/architecture.png and /dev/null differ