From f743e6c71cad91588ef67afd1aae850ba0d3d0f1 Mon Sep 17 00:00:00 2001
From: Konstantinos Triantafyllou <k.triantafyllou@di.uoa.gr>
Date: Fri, 9 Oct 2020 16:44:07 +0000
Subject: [PATCH] [Graph | Trunk]: Fix margins and paddings on about

git-svn-id: https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/uoa-graph-portal/trunk@59547 d315682c-612b-4755-9ff5-7f18f6832af3
---
 src/app/about/about.component.html | 1104 ++++++++++++++--------------
 1 file changed, 553 insertions(+), 551 deletions(-)
diff --git a/src/app/about/about.component.html b/src/app/about/about.component.html
index 74c9858..c504400 100644
--- a/src/app/about/about.component.html
+++ b/src/app/about/about.component.html
@@ -112,575 +112,577 @@
         </a>
       </div>
       <div id="tabs_card"
-           class="uk-margin-xlarge-top uk-margin-large-left uk-margin-large-right uk-card uk-card-default uk-card-body architecture-card">
-        <ul #tabs uk-tab class="uk-tab">
-          <li><a>Aggregation</a></li>
-          <li><a>Deduplication</a></li>
-          <li><a>Enrichment</a></li>
-          <li><a>Post-Cleaning</a></li>
-          <li><a>Indexing</a></li>
-          <li><a>Stats Analysis</a></li>
-        </ul>
+           class="uk-margin-xlarge-top uk-padding-small">
+        <div class="uk-card uk-card-default uk-card-body architecture-card">
+          <ul #tabs uk-tab class="uk-tab">
+            <li><a>Aggregation</a></li>
+            <li><a>Deduplication</a></li>
+            <li><a>Enrichment</a></li>
+            <li><a>Post-Cleaning</a></li>
+            <li><a>Indexing</a></li>
+            <li><a>Stats Analysis</a></li>
+          </ul>
 
-        <ul class="uk-switcher uk-margin">
-          <li>
-            <!--            uk-grid-->
-            <div class=" uk-margin-large-top uk-text-small">
-              <!--              <div class="uk-width-3-5@m">-->
-              <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                   src="assets/graph-assets/about/architecture/aggregation.png" alt="Aggregation">
-              <div
-                  [class]="'uk-margin-bottom uk-margin-medium-right '+(aggregationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-                <div>
-                  OpenAIRE collects metadata records from a variety of content providers as described in
-                  <a href="https://www.openaire.eu/aggregation-and-content-provision-workflows" target="_blank">https://www.openaire.eu/aggregation-and-content-provision-workflows</a>.
-                  <br><br>
-                  OpenAIRE aggregates metadata records describing objects of the research life-cycle from content
-                  providers compliant to the
-                  <a href="https://guidelines.openaire.eu" target="_blank">OpenAIRE guidelines</a>
-                  and from entity registries (i.e. data sources offering authoritative lists of entities, like OpenDOAR,
-                  re3data, DOAJ, and funder databases).
-                  After collection, metadata are transformed according to the OpenAIRE internal metadata model, which is
-                  used to generate the final OpenAIRE Research Graph that you can access from the OpenAIRE portal and
-                  the
-                  APIs.
-                  <br><br>
-                  The transformation process includes the application of cleaning functions whose goal is to ensure that
-                  values are harmonised according to a common format (e.g. dates as YYYY-MM-dd) and, whenever
-                  applicable,
-                  to a common controlled vocabulary.
-                  The controlled vocabularies used for cleansing are accessible at
-                  <a href="http://api.openaire.eu/vocabularies" target="_blank">http://api.openaire.eu/vocabularies</a>.
-                  Each vocabulary features a set of controlled terms, each with one code, one label, and a set of
-                  synonyms.
-                  If a synonym is found as field value, the value is updated with the corresponding term.
-                  Also, the OpenAIRE Research Graph is extended with other relevant scholarly communication sources that
-                  are too big to be integrated via the “normal” aggregation mechanism: DOIBoost (which merges Crossref,
-                  ORCID, Microsoft Academic Graph, and Unpaywall), and ScholeXplorer, one of the Scholix hubs offering a
-                  large set of links between research literature and data.
+          <ul class="uk-switcher uk-margin">
+            <li>
+              <!--            uk-grid-->
+              <div class=" uk-margin-large-top uk-text-small">
+                <!--              <div class="uk-width-3-5@m">-->
+                <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                     src="assets/graph-assets/about/architecture/aggregation.png" alt="Aggregation">
+                <div
+                    [class]="'uk-margin-bottom uk-margin-medium-right '+(aggregationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
+                  <div>
+                    OpenAIRE collects metadata records from a variety of content providers as described in
+                    <a href="https://www.openaire.eu/aggregation-and-content-provision-workflows" target="_blank">https://www.openaire.eu/aggregation-and-content-provision-workflows</a>.
+                    <br><br>
+                    OpenAIRE aggregates metadata records describing objects of the research life-cycle from content
+                    providers compliant to the
+                    <a href="https://guidelines.openaire.eu" target="_blank">OpenAIRE guidelines</a>
+                    and from entity registries (i.e. data sources offering authoritative lists of entities, like OpenDOAR,
+                    re3data, DOAJ, and funder databases).
+                    After collection, metadata are transformed according to the OpenAIRE internal metadata model, which is
+                    used to generate the final OpenAIRE Research Graph that you can access from the OpenAIRE portal and
+                    the
+                    APIs.
+                    <br><br>
+                    The transformation process includes the application of cleaning functions whose goal is to ensure that
+                    values are harmonised according to a common format (e.g. dates as YYYY-MM-dd) and, whenever
+                    applicable,
+                    to a common controlled vocabulary.
+                    The controlled vocabularies used for cleansing are accessible at
+                    <a href="http://api.openaire.eu/vocabularies" target="_blank">http://api.openaire.eu/vocabularies</a>.
+                    Each vocabulary features a set of controlled terms, each with one code, one label, and a set of
+                    synonyms.
+                    If a synonym is found as field value, the value is updated with the corresponding term.
+                    Also, the OpenAIRE Research Graph is extended with other relevant scholarly communication sources that
+                    are too big to be integrated via the “normal” aggregation mechanism: DOIBoost (which merges Crossref,
+                    ORCID, Microsoft Academic Graph, and Unpaywall), and ScholeXplorer, one of the Scholix hubs offering a
+                    large set of links between research literature and data.
+                  </div>
                 </div>
+                <div *ngIf="!aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                     (click)="aggregationReadMore = true">
+                  <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
+                </div>
+                <div *ngIf="aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                     (click)="aggregationReadMore = false">
+                  <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/aggregation.png">-->
+                <!--              </div>-->
               </div>
-              <div *ngIf="!aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                   (click)="aggregationReadMore = true">
-                <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-              </div>
-              <div *ngIf="aggregationReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                   (click)="aggregationReadMore = false">
-                <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-              </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/aggregation.png">-->
-              <!--              </div>-->
-            </div>
-          </li>
-          <li>
-            <div class="uk-grid">
-              <!--              <div class="uk-width-3-5@m">-->
-              <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
-                <ul class="uk-subnav button-tab" uk-switcher>
-                  <li><a>Clustering</a></li>
-                  <li><a>Matching & Election</a></li>
-                </ul>
+            </li>
+            <li>
+              <div class="uk-grid">
+                <!--              <div class="uk-width-3-5@m">-->
+                <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
+                  <ul class="uk-subnav button-tab" uk-switcher>
+                    <li><a>Clustering</a></li>
+                    <li><a>Matching & Election</a></li>
+                  </ul>
 
-                <ul class="uk-switcher uk-margin align-list">
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
-                    <div
-                        [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupClusteringReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-                      <div>
+                  <ul class="uk-switcher uk-margin align-list">
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
+                      <div
+                          [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupClusteringReadMore ? '' : 'lines-18 multi-line-ellipsis')">
                         <div>
-                          Clustering is a common heuristics used to overcome the N x N complexity required to match all
-                          pairs of objects to identify the equivalent ones.
-                          The challenge is to identify a clustering function that maximizes the chance of comparing only
-                          records that may lead to a match, while minimizing the number of records that will not be
-                          matched while being equivalent.
-                          Since the equivalence function is to some level tolerant to minimal errors (e.g. switching of
-                          characters in the title, or minimal difference in letters), we need this function to be not
-                          too
-                          precise (e.g. a hash of the title), but also not too flexible (e.g. random ngrams of the
-                          title).
-                          On the other hand, reality tells us that in some cases equality of two records can only be
-                          determined by their PIDs (e.g. DOI) as the metadata properties are very different across
-                          different versions and no clustering function will ever bring them into the same cluster.
-                          To match these requirements OpenAIRE clustering for products works with two functions:
-                        </div>
+                          <div>
+                            Clustering is a common heuristics used to overcome the N x N complexity required to match all
+                            pairs of objects to identify the equivalent ones.
+                            The challenge is to identify a clustering function that maximizes the chance of comparing only
+                            records that may lead to a match, while minimizing the number of records that will not be
+                            matched while being equivalent.
+                            Since the equivalence function is to some level tolerant to minimal errors (e.g. switching of
+                            characters in the title, or minimal difference in letters), we need this function to be not
+                            too
+                            precise (e.g. a hash of the title), but also not too flexible (e.g. random ngrams of the
+                            title).
+                            On the other hand, reality tells us that in some cases equality of two records can only be
+                            determined by their PIDs (e.g. DOI) as the metadata properties are very different across
+                            different versions and no clustering function will ever bring them into the same cluster.
+                            To match these requirements OpenAIRE clustering for products works with two functions:
+                          </div>
 
-                        <ul class="portal-circle">
-                          <li>
-                            <div>DOI: the function generates the DOI when this is provided as part of the record
-                              properties;
-                            </div>
-                          </li>
-                          <li>
-                            <div>
-                              Title-based function: the function generates a key that depends on
-                              (i) number of significant words in the title (normalized, stemming, etc.),
-                              (ii) module 10 of the number of characters of such words, and
-                              (iii) a string obtained as an alternation of the function prefix(3) and suffix(3) (and
-                              vice
-                              versa) o the first 3 words (2 words if the title only has 2). For example, the title
-                              “Entity
-                              deduplication in big data graphs for scholarly communication” becomes “entity
-                              deduplication
-                              big data graphs scholarly communication” with two keys key “7.1entionbig” and
-                              “7.1itydedbig”
-                              (where 1 is module 10 of 54 characters of the normalized title.
-                            </div>
-                          </li>
-                        </ul>
-                        <div>
-                          To give an idea, this configuration generates around 77Mi blocks, which we limited to 200
-                          records each (only 15K blocks are affected by the cut), and entails 260Bi matches. Matches in
-                          a
-                          block are performed using a “sliding window” set to 80 records. The records are sorted
-                          lexicographically on a normalized version of their titles. The 1st record is matched against
-                          all
-                          the 80 following ones, then the second, etc. for an NlogN complexity.
-                        </div>
-                      </div>
-                    </div>
-                    <div *ngIf="!dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="dedupClusteringReadMore = true">
-                      <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-                    </div>
-                    <div *ngIf="dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="dedupClusteringReadMore = false">
-                      <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-                    </div>
-                  </li>
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
-                    <div
-                        [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupMatchingAndElectionReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-                      <div>
-                        <div>
-                          Once the clusters have been built, the algorithm proceeds with the comparisons.
-                          Comparisons are driven by a decisional tree that:
-                        </div>
-                        <ul class="uk-list">
-                          <li class="uk-margin-small-bottom">
-                            <div>
-                              <span class="portal-color">1.</span> Tries to capture equivalence via PIDs: if records
-                              share
-                              a PID then they are equivalent
-                            </div>
-                          </li>
-                          <li class="uk-margin-small-bottom">
-                            <div>
-                              <span class="portal-color">2.</span> Tries to capture difference:
-                            </div>
-                            <ul class="uk-list">
-                              <li class="uk-margin-small-bottom">
-                                <div>
-                                  <span class="portal-color">a.</span>
-                                  If record titles contain different “numbers” then they are different (this rule is
-                                  subject to different feelings, and should be fine-tuned);
-                                </div>
-                              </li>
-                              <li class="uk-margin-small-bottom">
-                                <div>
-                                  <span class="portal-color">b.</span>
-                                  If record contain different number of authors then they are different;
-                                </div>
-                              </li>
-                              <li class="uk-margin-small-bottom">
-                                <div>
-                                  <span class="portal-color">c.</span>
-                                  Note that different PIDs do not imply different records, as different versions may
-                                  have
-                                  different PIDs.
-                                </div>
-                              </li>
-                            </ul>
-                          </li>
-                          <li>
-                            <div><span class="portal-color">3.</span> Measures equivalence:</div>
-                            <ul class="uk-list portal-circle">
-                              <li>
-                                <div>
-                                  The titles of the two records are normalised and compared for similarity by applying
-                                  the
-                                  Levenstein distance algorithm.
-                                  The algorithm returns a number in the range [0,1], where 0 means “very different” and
-                                  1
-                                  means “equal”.
-                                  If the distance is greater than or equal 0,99 the two records are identified as
-                                  duplicates.
-                                </div>
-                              </li>
-                              <li>
-                                <div>Dates are not regarded for equivalence matching because different versions of the
-                                  same records should be merged and may be published on different dates, e.g. pre-print
-                                  and published version of an article.
-                                </div>
-                              </li>
-                            </ul>
-                          </li>
-                        </ul>
-                        <div>
-                          Once the equivalence relationships between pairs of records are set, the groups of equivalent
-                          records are obtained (transitive closure, i.e. “mesh”).
-                          From such sets a new representative object is obtained, which inherits all properties from the
-                          merged records and keeps track of their provenance.
-                          The ID of the record is obtained by appending the prefix “dedup_” to the MD5 of the first ID
-                          (given their lexicographical ordering).
-                          A new, more stable function to generate the ID is under development, which exploits the DOI
-                          when
-                          one of the records to be merged includes a Crossref or a DataCite record.
-                        </div>
-                      </div>
-                    </div>
-                    <div *ngIf="!dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="dedupMatchingAndElectionReadMore = true">
-                      <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-                    </div>
-                    <div *ngIf="dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="dedupMatchingAndElectionReadMore = false">
-                      <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-                    </div>
-                  </li>
-                </ul>
-              </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/deduplication.svg">-->
-              <!--              </div>-->
-            </div>
-          </li>
-          <li>
-            <div class="uk-grid">
-              <!--              <div class="uk-width-3-5@m">-->
-              <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
-                <ul class="uk-subnav button-tab uk-grid uk-grid-small" uk-switcher>
-                  <li><a>General</a></li>
-                  <li><a>Mining</a></li>
-                  <li><a>Bulk tagging/ Deduction</a></li>
-                  <li><a>Propagation</a></li>
-                </ul>
-
-                <ul class="uk-switcher uk-margin">
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
-                    <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
-                      <p>
-                        The aggregation processes are continuously running and apply vocabularies as they are in a given
-                        moment of time.
-                        It could be the case that a vocabulary changes after the aggregation of one data source has
-                        finished,
-                        thus the aggregated content does not reflect the current status of the controlled vocabularies.
-                        <br><br>
-                        In addition, the integration of ScholeXplorer and DOIBooost and some enrichment processes
-                        applied
-                        on the raw
-                        and on the de-duplicated graph may introduce values that do not comply with the current status
-                        of
-                        the OpenAIRE controlled vocabularies.
-                        For these reasons, we included a final step of cleansing at the end of the workflow
-                        materialisation.
-                        The output of the final cleansing step is the final version of the OpenAIRE Research Graph.
-                      </p>
-                    </div>
-                  </li>
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
-                    <div
-                        [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentMiningReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-                      <div>
-                        <div>
-                          The OpenAIRE Research Graph is enriched by links mined by OpenAIRE’s full-text mining
-                          algorithms
-                          that scan the plaintexts of publications for funding information, references to datasets,
-                          software URIs, accession numbers of bioetities, and EPO patent mentions.
-                          Custom mining modules also link research objects to specific research communities, initiatives
-                          and infrastructures.
-                          In addition, other inference modules provide content-based document classification, document
-                          similarity, citation matching, and author affiliation matching.
-                          <br><br>
-                          <span class="portal-color">Project mining</span>
-                          in OpenAIRE text mines the full-texts of publications in order to extract matches to funding
-                          project codes/IDs.
-                          The mining algorithm works by utilising
-                          (i) the grant identifier, and
-                          (ii) the project acronym (if available) of each project.
-                          The mining algorithm:
-                          (1) Preprocesses/normalizes the full-texts using several functions, which depend on the
-                          characteristics of each funder (i.e., the format of the grant identifiers), such as stopword
-                          and/or punctuation removal, tokenization, stemming, converting to lowercase; then
-                          (2) String matching of grant identifiers against the normalized text is done using database
-                          techniques; and
-                          (3) The results are validated and cleaned using the context near the match by looking at the
-                          context around the matched ID for relevant metadata and positive or negative words/phrases, in
-                          order to calculate a confidence value for each publication-->project link.
-                          A confidence threshold is set to optimise high accuracy while minimising false positives, such
-                          as matches with page or report numbers, post/zip codes, parts of telephone numbers, DOIs or
-                          URLs, accession numbers.
-                          The algorithm also applies rules for disambiguating results, as different funders can share
-                          identical project IDs; for example, grant number 633172 could refer to H2020 project EuroMix
-                          but
-                          also to Australian-funded NHMRC project “Brain activity (EEG) analysis and brain imaging
-                          techniques to measure the neurobiological effects of sleep apnea”.
-                          Project mining works very well and was the first Text & Data Mining (TDM) service of OpenAIRE.
-                          Performance results vary from funder to funder but precision is higher than 98% for all
-                          funders
-                          and 99.5% for EC projects.
-                          Recall is higher than 95% (99% for EC projects), when projects are properly acknowledged using
-                          project/grant IDs.
-                          <br><br>
-                          <span class="portal-color">Dataset extraction</span>
-                          runs on publications full-texts as described in “High pass text-filtering for Citation
-                          matching”, TPDL 2017[1].
-                          In particular, we search for citations to datasets using their DOIs, titles and other metadata
-                          (i.e., dates, creator names, publishers, etc.).
-                          We extract parts of the text which look like citations and search for datasets using database
-                          join and pattern matching techniques.
-                          Based on the experiments described in the paper, precision of the dataset extraction module is
-                          98.5% and recall is 97.4% but it is also probably overestimated since it does not take into
-                          account corruptions that may take place during pdf to text extraction.
-                          It is calculated on the extracted full-texts of small samples from PubMed and arXiv.
-                          <br><br>
-                          <span class="portal-color">Software extraction</span>
-                          runs also on parts of the text which look like citations.
-                          We search the citations for links to software in open software repositories, specifically
-                          github, sourceforge, bitbucket and the google code archive.
-                          After that, we search for links that are included in Software Heritage (SH,
-                          https://www.softwareheritage.org) and return the permanent URL that SH provides for each
-                          software project.
-                          We also enrich this content with user names, titles and descriptions of the software projects
-                          using web mining techniques.
-                          Since software mining is based on URL matching, our precision is 100% (we return a software
-                          link
-                          only if we find it in the text and there is no need to disambiguate).
-                          As for recall rate, this is not calculable for this mining task.
-                          Although we apply all the necessary normalizations to the URLs in order to overcome usual
-                          issues
-                          (e.g., http or https, existence of www or not, lower/upper case), we do not calculate cases
-                          where a software is mentioned using its name and not by a link from the supported software
-                          repositories.
-                          <br><br>
-                          <span class="portal-color">For the extraction of bio-entities</span>, we focus on Protein Data
-                          Bank (PDB) entries.
-                          We have downloaded the database with PDB codes and we update it regularly.
-                          We search through the whole publication’s full-text for references to PDB codes.
-                          We apply disambiguation rules (e.g., there are PDB codes that are the same as antibody codes
-                          or
-                          other issues) so that we return valid results.
-                          Current precision is 98%.
-                          Although it's risky to mention recall rates since these are usually overestimated, we have
-                          calculated a recall rate of 98% using small samples from pubmed publications.
-                          Moreover, our technique is able to identify about 30% more links to proteins than the ones
-                          that
-                          are tagged in Pubmed xmls.
-                          <br><br>
-                          <span class="portal-color">Other text-mining modules</span> include mining for links to EPO
-                          patents, or custom mining modules for linking research objects to specific research
-                          communities,
-                          initiatives and infrastructures, e.g. COVID-19 mining module.
-                          Apart from text-mining modules, OpenAIRE also provides a document classification service that
-                          employs analysis of free text stemming from the abstracts of the publications.
-                          The purpose of applying a document classification module is to assign a scientific text one or
-                          more predefined content classes.
-                          In OpenAIRE, the currently used taxonomies are arXiv, MeSH (Medical Subject Headings), ACM and
-                          DDC (Dewey Decimal Classification, or Dewey Decimal System).
-                          <br><br>
-                          <hr>
-                          [1] Foufoulas, Y., Stamatogiannakis, L., Dimitropoulos, H., & Ioannidis, Y. (2017, September).
-                          High-Pass Text Filtering for Citation Matching.
-                          In International Conference on Theory and Practice of Digital Libraries (pp. 355-366).
-                          Springer,
-                          Cham.
-                        </div>
-                      </div>
-                    </div>
-                    <div *ngIf="!enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="enrichmentMiningReadMore = true">
-                      <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-                    </div>
-                    <div *ngIf="enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="enrichmentMiningReadMore = false">
-                      <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-                    </div>
-                  </li>
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
-                    <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
-                      The Deduction process (also known as “bulk tagging”) enriches each record with new information
-                      that
-                      can be derived from the existing property values.
-                      <br><br>
-                      As of September 2020, three procedures are in place to relate a research product to a research
-                      initiative, infrastructure (RI) or community (RC) based on:
-                      <ul class="portal-circle">
-                        <li>subjects (2.7M results tagged)</li>
-                        <li>Zenodo community (16K results tagged)</li>
-                        <li>the data source it comes from (250K results tagged)</li>
-                      </ul>
-                      The list of subjects, Zenodo communities and data sources used to enrich the products are defined
-                      by
-                      the managers of the community gateway or infrastructure monitoring dashboard associated with the
-                      RC/RI.
-                    </div>
-                  </li>
-                  <li>
-                    <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                         src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
-                    <div
-                        [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentPropagationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
-                      <div>
-                        <div>
-                          This process “propagates” properties and links from one product to another if between the two
-                          there is a “strong” semantic relationship.
-                          <br><br>
-                          As of September 2020, the following procedures are in place:
                           <ul class="portal-circle">
                             <li>
-                              Propagation of the property “country” to results from institutional repositories:
-                              e.g. publication collected from an institutional repository maintained by an italian
-                              university will be enriched with the property “country = IT”.
+                              <div>DOI: the function generates the DOI when this is provided as part of the record
+                                properties;
+                              </div>
                             </li>
                             <li>
-                              Propagation of links to projects: e.g. publication linked to project P “is supplemented
-                              by”
-                              a dataset D.
-                              Dataset D will get the link to project P.
-                              The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
-                            </li>
-                            <li>
-                              Propagation of related community/infrastructure/initiative from organizations to products
-                              via affiliation relationships: e.g. a publication with an author affiliated with
-                              organization O.
-                              The manager of the community gateway C declared that the outputs of O are all relevant for
-                              his/her community C.
-                              The publication is tagged as relevant for C.
-                            </li>
-                            <li>
-                              Propagation of related community/infrastructure/initiative to related products: e.g.
-                              publication associated to community C is supplemented by a dataset D.
-                              Dataset D will get the association to C.
-                              The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
-                            </li>
-                            <li>
-                              Propagation of ORCID identifiers to related products, if the products have the same
-                              authors:
-                              e.g. publication has ORCID for its authors and is supplemented by a dataset D. Dataset D
-                              has
-                              the same authors as the publication. Authors of D are enriched with the ORCIDs available
-                              in
-                              the publication.
-                              The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
+                              <div>
+                                Title-based function: the function generates a key that depends on
+                                (i) number of significant words in the title (normalized, stemming, etc.),
+                                (ii) module 10 of the number of characters of such words, and
+                                (iii) a string obtained as an alternation of the function prefix(3) and suffix(3) (and
+                                vice
+                                versa) o the first 3 words (2 words if the title only has 2). For example, the title
+                                “Entity
+                                deduplication in big data graphs for scholarly communication” becomes “entity
+                                deduplication
+                                big data graphs scholarly communication” with two keys key “7.1entionbig” and
+                                “7.1itydedbig”
+                                (where 1 is module 10 of 54 characters of the normalized title.
+                              </div>
                             </li>
                           </ul>
+                          <div>
+                            To give an idea, this configuration generates around 77Mi blocks, which we limited to 200
+                            records each (only 15K blocks are affected by the cut), and entails 260Bi matches. Matches in
+                            a
+                            block are performed using a “sliding window” set to 80 records. The records are sorted
+                            lexicographically on a normalized version of their titles. The 1st record is matched against
+                            all
+                            the 80 following ones, then the second, etc. for an NlogN complexity.
+                          </div>
                         </div>
                       </div>
-                    </div>
-                    <div *ngIf="!enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="enrichmentPropagationReadMore = true">
-                      <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
-                    </div>
-                    <div *ngIf="enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
-                         (click)="enrichmentPropagationReadMore = false">
-                      <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
-                    </div>
-                  </li>
-                </ul>
+                      <div *ngIf="!dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="dedupClusteringReadMore = true">
+                        <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
+                      </div>
+                      <div *ngIf="dedupClusteringReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="dedupClusteringReadMore = false">
+                        <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
+                      </div>
+                    </li>
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/deduplication.svg" alt="Deduplication">
+                      <div
+                          [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(dedupMatchingAndElectionReadMore ? '' : 'lines-18 multi-line-ellipsis')">
+                        <div>
+                          <div>
+                            Once the clusters have been built, the algorithm proceeds with the comparisons.
+                            Comparisons are driven by a decisional tree that:
+                          </div>
+                          <ul class="uk-list">
+                            <li class="uk-margin-small-bottom">
+                              <div>
+                                <span class="portal-color">1.</span> Tries to capture equivalence via PIDs: if records
+                                share
+                                a PID then they are equivalent
+                              </div>
+                            </li>
+                            <li class="uk-margin-small-bottom">
+                              <div>
+                                <span class="portal-color">2.</span> Tries to capture difference:
+                              </div>
+                              <ul class="uk-list">
+                                <li class="uk-margin-small-bottom">
+                                  <div>
+                                    <span class="portal-color">a.</span>
+                                    If record titles contain different “numbers” then they are different (this rule is
+                                    subject to different feelings, and should be fine-tuned);
+                                  </div>
+                                </li>
+                                <li class="uk-margin-small-bottom">
+                                  <div>
+                                    <span class="portal-color">b.</span>
+                                    If record contain different number of authors then they are different;
+                                  </div>
+                                </li>
+                                <li class="uk-margin-small-bottom">
+                                  <div>
+                                    <span class="portal-color">c.</span>
+                                    Note that different PIDs do not imply different records, as different versions may
+                                    have
+                                    different PIDs.
+                                  </div>
+                                </li>
+                              </ul>
+                            </li>
+                            <li>
+                              <div><span class="portal-color">3.</span> Measures equivalence:</div>
+                              <ul class="uk-list portal-circle">
+                                <li>
+                                  <div>
+                                    The titles of the two records are normalised and compared for similarity by applying
+                                    the
+                                    Levenstein distance algorithm.
+                                    The algorithm returns a number in the range [0,1], where 0 means “very different” and
+                                    1
+                                    means “equal”.
+                                    If the distance is greater than or equal 0,99 the two records are identified as
+                                    duplicates.
+                                  </div>
+                                </li>
+                                <li>
+                                  <div>Dates are not regarded for equivalence matching because different versions of the
+                                    same records should be merged and may be published on different dates, e.g. pre-print
+                                    and published version of an article.
+                                  </div>
+                                </li>
+                              </ul>
+                            </li>
+                          </ul>
+                          <div>
+                            Once the equivalence relationships between pairs of records are set, the groups of equivalent
+                            records are obtained (transitive closure, i.e. “mesh”).
+                            From such sets a new representative object is obtained, which inherits all properties from the
+                            merged records and keeps track of their provenance.
+                            The ID of the record is obtained by appending the prefix “dedup_” to the MD5 of the first ID
+                            (given their lexicographical ordering).
+                            A new, more stable function to generate the ID is under development, which exploits the DOI
+                            when
+                            one of the records to be merged includes a Crossref or a DataCite record.
+                          </div>
+                        </div>
+                      </div>
+                      <div *ngIf="!dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="dedupMatchingAndElectionReadMore = true">
+                        <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
+                      </div>
+                      <div *ngIf="dedupMatchingAndElectionReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="dedupMatchingAndElectionReadMore = false">
+                        <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
+                      </div>
+                    </li>
+                  </ul>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/deduplication.svg">-->
+                <!--              </div>-->
               </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/enrichment.svg">-->
-              <!--              </div>-->
-            </div>
-          </li>
-          <li>
-            <div class="uk-text-small uk-margin-large-top">
-              <!--              <div class="uk-width-3-5@m">-->
-              <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                   src="assets/graph-assets/about/architecture/post_cleaning.svg" alt="Post Cleaning">
-              <div class="uk-margin-bottom uk-margin-medium-right">
-                <p>
-                  Lorem ipsum...
-                </p>
+            </li>
+            <li>
+              <div class="uk-grid">
+                <!--              <div class="uk-width-3-5@m">-->
+                <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
+                  <ul class="uk-subnav button-tab uk-grid uk-grid-small" uk-switcher>
+                    <li><a>General</a></li>
+                    <li><a>Mining</a></li>
+                    <li><a>Bulk tagging/ Deduction</a></li>
+                    <li><a>Propagation</a></li>
+                  </ul>
+
+                  <ul class="uk-switcher uk-margin">
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
+                      <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
+                        <p>
+                          The aggregation processes are continuously running and apply vocabularies as they are in a given
+                          moment of time.
+                          It could be the case that a vocabulary changes after the aggregation of one data source has
+                          finished,
+                          thus the aggregated content does not reflect the current status of the controlled vocabularies.
+                          <br><br>
+                          In addition, the integration of ScholeXplorer and DOIBooost and some enrichment processes
+                          applied
+                          on the raw
+                          and on the de-duplicated graph may introduce values that do not comply with the current status
+                          of
+                          the OpenAIRE controlled vocabularies.
+                          For these reasons, we included a final step of cleansing at the end of the workflow
+                          materialisation.
+                          The output of the final cleansing step is the final version of the OpenAIRE Research Graph.
+                        </p>
+                      </div>
+                    </li>
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
+                      <div
+                          [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentMiningReadMore ? '' : 'lines-18 multi-line-ellipsis')">
+                        <div>
+                          <div>
+                            The OpenAIRE Research Graph is enriched by links mined by OpenAIRE’s full-text mining
+                            algorithms
+                            that scan the plaintexts of publications for funding information, references to datasets,
+                            software URIs, accession numbers of bioetities, and EPO patent mentions.
+                            Custom mining modules also link research objects to specific research communities, initiatives
+                            and infrastructures.
+                            In addition, other inference modules provide content-based document classification, document
+                            similarity, citation matching, and author affiliation matching.
+                            <br><br>
+                            <span class="portal-color">Project mining</span>
+                            in OpenAIRE text mines the full-texts of publications in order to extract matches to funding
+                            project codes/IDs.
+                            The mining algorithm works by utilising
+                            (i) the grant identifier, and
+                            (ii) the project acronym (if available) of each project.
+                            The mining algorithm:
+                            (1) Preprocesses/normalizes the full-texts using several functions, which depend on the
+                            characteristics of each funder (i.e., the format of the grant identifiers), such as stopword
+                            and/or punctuation removal, tokenization, stemming, converting to lowercase; then
+                            (2) String matching of grant identifiers against the normalized text is done using database
+                            techniques; and
+                            (3) The results are validated and cleaned using the context near the match by looking at the
+                            context around the matched ID for relevant metadata and positive or negative words/phrases, in
+                            order to calculate a confidence value for each publication-->project link.
+                            A confidence threshold is set to optimise high accuracy while minimising false positives, such
+                            as matches with page or report numbers, post/zip codes, parts of telephone numbers, DOIs or
+                            URLs, accession numbers.
+                            The algorithm also applies rules for disambiguating results, as different funders can share
+                            identical project IDs; for example, grant number 633172 could refer to H2020 project EuroMix
+                            but
+                            also to Australian-funded NHMRC project “Brain activity (EEG) analysis and brain imaging
+                            techniques to measure the neurobiological effects of sleep apnea”.
+                            Project mining works very well and was the first Text & Data Mining (TDM) service of OpenAIRE.
+                            Performance results vary from funder to funder but precision is higher than 98% for all
+                            funders
+                            and 99.5% for EC projects.
+                            Recall is higher than 95% (99% for EC projects), when projects are properly acknowledged using
+                            project/grant IDs.
+                            <br><br>
+                            <span class="portal-color">Dataset extraction</span>
+                            runs on publications full-texts as described in “High pass text-filtering for Citation
+                            matching”, TPDL 2017[1].
+                            In particular, we search for citations to datasets using their DOIs, titles and other metadata
+                            (i.e., dates, creator names, publishers, etc.).
+                            We extract parts of the text which look like citations and search for datasets using database
+                            join and pattern matching techniques.
+                            Based on the experiments described in the paper, precision of the dataset extraction module is
+                            98.5% and recall is 97.4% but it is also probably overestimated since it does not take into
+                            account corruptions that may take place during pdf to text extraction.
+                            It is calculated on the extracted full-texts of small samples from PubMed and arXiv.
+                            <br><br>
+                            <span class="portal-color">Software extraction</span>
+                            runs also on parts of the text which look like citations.
+                            We search the citations for links to software in open software repositories, specifically
+                            github, sourceforge, bitbucket and the google code archive.
+                            After that, we search for links that are included in Software Heritage (SH,
+                            https://www.softwareheritage.org) and return the permanent URL that SH provides for each
+                            software project.
+                            We also enrich this content with user names, titles and descriptions of the software projects
+                            using web mining techniques.
+                            Since software mining is based on URL matching, our precision is 100% (we return a software
+                            link
+                            only if we find it in the text and there is no need to disambiguate).
+                            As for recall rate, this is not calculable for this mining task.
+                            Although we apply all the necessary normalizations to the URLs in order to overcome usual
+                            issues
+                            (e.g., http or https, existence of www or not, lower/upper case), we do not calculate cases
+                            where a software is mentioned using its name and not by a link from the supported software
+                            repositories.
+                            <br><br>
+                            <span class="portal-color">For the extraction of bio-entities</span>, we focus on Protein Data
+                            Bank (PDB) entries.
+                            We have downloaded the database with PDB codes and we update it regularly.
+                            We search through the whole publication’s full-text for references to PDB codes.
+                            We apply disambiguation rules (e.g., there are PDB codes that are the same as antibody codes
+                            or
+                            other issues) so that we return valid results.
+                            Current precision is 98%.
+                            Although it's risky to mention recall rates since these are usually overestimated, we have
+                            calculated a recall rate of 98% using small samples from pubmed publications.
+                            Moreover, our technique is able to identify about 30% more links to proteins than the ones
+                            that
+                            are tagged in Pubmed xmls.
+                            <br><br>
+                            <span class="portal-color">Other text-mining modules</span> include mining for links to EPO
+                            patents, or custom mining modules for linking research objects to specific research
+                            communities,
+                            initiatives and infrastructures, e.g. COVID-19 mining module.
+                            Apart from text-mining modules, OpenAIRE also provides a document classification service that
+                            employs analysis of free text stemming from the abstracts of the publications.
+                            The purpose of applying a document classification module is to assign a scientific text one or
+                            more predefined content classes.
+                            In OpenAIRE, the currently used taxonomies are arXiv, MeSH (Medical Subject Headings), ACM and
+                            DDC (Dewey Decimal Classification, or Dewey Decimal System).
+                            <br><br>
+                            <hr>
+                            [1] Foufoulas, Y., Stamatogiannakis, L., Dimitropoulos, H., & Ioannidis, Y. (2017, September).
+                            High-Pass Text Filtering for Citation Matching.
+                            In International Conference on Theory and Practice of Digital Libraries (pp. 355-366).
+                            Springer,
+                            Cham.
+                          </div>
+                        </div>
+                      </div>
+                      <div *ngIf="!enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="enrichmentMiningReadMore = true">
+                        <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
+                      </div>
+                      <div *ngIf="enrichmentMiningReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="enrichmentMiningReadMore = false">
+                        <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
+                      </div>
+                    </li>
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
+                      <div class="uk-margin-bottom uk-margin-medium-right uk-text-small">
+                        The Deduction process (also known as “bulk tagging”) enriches each record with new information
+                        that
+                        can be derived from the existing property values.
+                        <br><br>
+                        As of September 2020, three procedures are in place to relate a research product to a research
+                        initiative, infrastructure (RI) or community (RC) based on:
+                        <ul class="portal-circle">
+                          <li>subjects (2.7M results tagged)</li>
+                          <li>Zenodo community (16K results tagged)</li>
+                          <li>the data source it comes from (250K results tagged)</li>
+                        </ul>
+                        The list of subjects, Zenodo communities and data sources used to enrich the products are defined
+                        by
+                        the managers of the community gateway or infrastructure monitoring dashboard associated with the
+                        RC/RI.
+                      </div>
+                    </li>
+                    <li>
+                      <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                           src="assets/graph-assets/about/architecture/enrichment.svg" alt="Enrichment">
+                      <div
+                          [class]="'uk-margin-bottom uk-margin-medium-right uk-text-small '+(enrichmentPropagationReadMore ? '' : 'lines-18 multi-line-ellipsis')">
+                        <div>
+                          <div>
+                            This process “propagates” properties and links from one product to another if between the two
+                            there is a “strong” semantic relationship.
+                            <br><br>
+                            As of September 2020, the following procedures are in place:
+                            <ul class="portal-circle">
+                              <li>
+                                Propagation of the property “country” to results from institutional repositories:
+                                e.g. publication collected from an institutional repository maintained by an italian
+                                university will be enriched with the property “country = IT”.
+                              </li>
+                              <li>
+                                Propagation of links to projects: e.g. publication linked to project P “is supplemented
+                                by”
+                                a dataset D.
+                                Dataset D will get the link to project P.
+                                The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
+                              </li>
+                              <li>
+                                Propagation of related community/infrastructure/initiative from organizations to products
+                                via affiliation relationships: e.g. a publication with an author affiliated with
+                                organization O.
+                                The manager of the community gateway C declared that the outputs of O are all relevant for
+                                his/her community C.
+                                The publication is tagged as relevant for C.
+                              </li>
+                              <li>
+                                Propagation of related community/infrastructure/initiative to related products: e.g.
+                                publication associated to community C is supplemented by a dataset D.
+                                Dataset D will get the association to C.
+                                The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
+                              </li>
+                              <li>
+                                Propagation of ORCID identifiers to related products, if the products have the same
+                                authors:
+                                e.g. publication has ORCID for its authors and is supplemented by a dataset D. Dataset D
+                                has
+                                the same authors as the publication. Authors of D are enriched with the ORCIDs available
+                                in
+                                the publication.
+                                The relationships considered for this procedure are “isSupplementedBy” and “supplements”.
+                              </li>
+                            </ul>
+                          </div>
+                        </div>
+                      </div>
+                      <div *ngIf="!enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="enrichmentPropagationReadMore = true">
+                        <a class="custom-explore-toggle">Read more<span uk-icon="chevron-down"></span></a>
+                      </div>
+                      <div *ngIf="enrichmentPropagationReadMore" class="uk-width-3-5@m uk-text-center clickable"
+                           (click)="enrichmentPropagationReadMore = false">
+                        <a class="custom-explore-toggle">Read less<span uk-icon="chevron-up"></span></a>
+                      </div>
+                    </li>
+                  </ul>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/enrichment.svg">-->
+                <!--              </div>-->
               </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/post_cleaning.svg">-->
-              <!--              </div>-->
-            </div>
-          </li>
-          <li>
-            <div class="uk-text-small uk-margin-large-top">
-              <!--              <div class="uk-width-3-5@m">-->
-              <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
-                   src="assets/graph-assets/about/architecture/indexing.svg" alt="Indexing">
-              <div class="uk-margin-bottom uk-margin-medium-right">
-                <p>
-                  The final version of the OpenAIRE Research Graph is indexed on a Solr server that is used by the
-                  OpenAIRE portals (EXPLORE, CONNECT, PROVIDE) and APIs, the latter adopted by several third-party
-                  applications and organizations, such as:
-                </p>
-                <ul class="portal-circle">
-                  <li class="uk-margin-small-bottom">
-                    <span class="portal-color">EOSC</span>
-                    --The OpenAIRE Research Graph APIs and Portals will offer to the EOSC an Open Science Resource
-                    Catalogue, keeping an up to date map of all research results (publications, datasets, software),
-                    services, organizations, projects, funders in Europe and beyond.
-                  </li>
-                  <li class="uk-margin-small-bottom">
-                    <span class="portal-color">DSpace & EPrints</span>
-                    repositories can install the OpenAIRE plugin to expose OpenAIRE compliant metadata records via their
-                    OAI-PMH endpoint and offer to researchers the possibility to link their depositions to the funding
-                    project, by selecting it from the list of project provided by OpenAIRE
-                  </li>
-                  <li>
-                    <span class="portal-color">EC participant portal (Sygma - System for Grant Management)</span>
-                    uses the OpenAIRE API in the “Continuous Reporting” section.
-                    Sygma automatically fetches from the OpenAIRE Search API the list of publications and datasets in
-                    the
-                    OpenAIRE Research Graph that are linked to the project.
-                    The user can select the research products from the list and easily compile the continuous reporting
-                    data of the project.
-                  </li>
-                </ul>
+            </li>
+            <li>
+              <div class="uk-text-small uk-margin-large-top">
+                <!--              <div class="uk-width-3-5@m">-->
+                <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                     src="assets/graph-assets/about/architecture/post_cleaning.svg" alt="Post Cleaning">
+                <div class="uk-margin-bottom uk-margin-medium-right">
+                  <p>
+                    Lorem ipsum...
+                  </p>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/post_cleaning.svg">-->
+                <!--              </div>-->
               </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/indexing.svg">-->
-              <!--              </div>-->
-            </div>
-          </li>
-          <li>
-            <div class="uk-text-small uk-margin-large-top">
-              <!--              <div class="uk-width-3-5@m">-->
-              <img
-                  class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image uk-padding-large uk-padding-remove-top uk-padding-remove-horizontal"
-                  src="assets/graph-assets/about/architecture/stats_analysis.svg" alt="Stats Analysis">
-              <div class="uk-margin-bottom uk-margin-medium-right">
-                <p>
-                  The OpenAIRE Research Graph is also processed by a pipeline for extracting the statistics and
-                  producing
-                  the charts for funders, research initiative, infrastructures, and policy makers that you can see on
-                  MONITOR.
-                  Based on the information available on the graph, OpenAIRE provides a set of indicators for monitoring
-                  the funding and research impact and the uptake of Open Science publishing practices,
-                  such as Open Access publishing of publications and datasets, availability of interlinks between
-                  research
-                  products, availability of post-print versions in institutional or thematic Open Access repositories,
-                  etc.
-                </p>
+            </li>
+            <li>
+              <div class="uk-text-small uk-margin-large-top">
+                <!--              <div class="uk-width-3-5@m">-->
+                <img class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image"
+                     src="assets/graph-assets/about/architecture/indexing.svg" alt="Indexing">
+                <div class="uk-margin-bottom uk-margin-medium-right">
+                  <p>
+                    The final version of the OpenAIRE Research Graph is indexed on a Solr server that is used by the
+                    OpenAIRE portals (EXPLORE, CONNECT, PROVIDE) and APIs, the latter adopted by several third-party
+                    applications and organizations, such as:
+                  </p>
+                  <ul class="portal-circle">
+                    <li class="uk-margin-small-bottom">
+                      <span class="portal-color">EOSC</span>
+                      --The OpenAIRE Research Graph APIs and Portals will offer to the EOSC an Open Science Resource
+                      Catalogue, keeping an up to date map of all research results (publications, datasets, software),
+                      services, organizations, projects, funders in Europe and beyond.
+                    </li>
+                    <li class="uk-margin-small-bottom">
+                      <span class="portal-color">DSpace & EPrints</span>
+                      repositories can install the OpenAIRE plugin to expose OpenAIRE compliant metadata records via their
+                      OAI-PMH endpoint and offer to researchers the possibility to link their depositions to the funding
+                      project, by selecting it from the list of project provided by OpenAIRE
+                    </li>
+                    <li>
+                      <span class="portal-color">EC participant portal (Sygma - System for Grant Management)</span>
+                      uses the OpenAIRE API in the “Continuous Reporting” section.
+                      Sygma automatically fetches from the OpenAIRE Search API the list of publications and datasets in
+                      the
+                      OpenAIRE Research Graph that are linked to the project.
+                      The user can select the research products from the list and easily compile the continuous reporting
+                      data of the project.
+                    </li>
+                  </ul>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/indexing.svg">-->
+                <!--              </div>-->
               </div>
-              <!--              </div>-->
-              <!--              <div class="uk-width-expand">-->
-              <!--                <img src="assets/graph-assets/about/architecture/stats_analysis.svg">-->
-              <!--              </div>-->
-            </div>
-          </li>
-        </ul>
+            </li>
+            <li>
+              <div class="uk-text-small uk-margin-large-top">
+                <!--              <div class="uk-width-3-5@m">-->
+                <img
+                    class="uk-width-2-5@m uk-align-right@m uk-margin-remove-adjacent tab-image uk-padding-large uk-padding-remove-top uk-padding-remove-horizontal"
+                    src="assets/graph-assets/about/architecture/stats_analysis.svg" alt="Stats Analysis">
+                <div class="uk-margin-bottom uk-margin-medium-right">
+                  <p>
+                    The OpenAIRE Research Graph is also processed by a pipeline for extracting the statistics and
+                    producing
+                    the charts for funders, research initiative, infrastructures, and policy makers that you can see on
+                    MONITOR.
+                    Based on the information available on the graph, OpenAIRE provides a set of indicators for monitoring
+                    the funding and research impact and the uptake of Open Science publishing practices,
+                    such as Open Access publishing of publications and datasets, availability of interlinks between
+                    research
+                    products, availability of post-print versions in institutional or thematic Open Access repositories,
+                    etc.
+                  </p>
+                </div>
+                <!--              </div>-->
+                <!--              <div class="uk-width-expand">-->
+                <!--                <img src="assets/graph-assets/about/architecture/stats_analysis.svg">-->
+                <!--              </div>-->
+              </div>
+            </li>
+          </ul>
+        </div>
       </div>
-      <div class="uk-margin-top uk-margin-large-left uk-margin-large-right">
+      <div class="uk-padding-small uk-margin-top">
         <h6>References</h6>
         <ul class="uk-text-small portal-circle">
           <li>
@@ -713,7 +715,7 @@
   <div id="infrastructure" class="uk-container uk-container-large uk-section">
     <div class="uk-padding-small">
       <h2 class="uk-text-center">Infrastructure</h2>
-      <div class="uk-margin-large-left uk-margin-large-right">
+      <div>
         <div class="uk-flex uk-flex-center">
           <p class="uk-width-3-4@m uk-padding-small">
             The OpenAIRE graph operates based on a vast variety of hardware and software. As of December 2019, the