From 94d2e7523fc57f6133819df6de3471c772292707 Mon Sep 17 00:00:00 2001 From: "michele.artini" Date: Wed, 22 Jul 2020 09:59:24 +0200 Subject: [PATCH] import simrels --- .../oa/graph/raw/AbstractDbApplication.java | 40 +++++++++++++++++++ .../graph/raw/MigrateOpenOrgsApplication.java | 2 +- .../sql/queryOrganizationsFromOpenOrgsDB.sql | 2 +- .../sql/querySimilarityFromOpenOrgsDB.sql | 20 ++++++++-- 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java index 8e2940b6a..bdfc8fec7 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/AbstractDbApplication.java @@ -75,6 +75,10 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication public static final String SOURCE_TYPE = "source_type"; public static final String TARGET_TYPE = "target_type"; + private static final String ORG_ORG_RELTYPE = "organizationOrganization"; + private static final String ORG_ORG_SUBRELTYPE = "dedup"; + private static final String ORG_ORG_CLASS = "isSimilarTo"; + private final DbClient dbClient; private final long lastUpdateTimestamp; @@ -423,6 +427,42 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication } } + public List processOrgOrgSimRels(final ResultSet rs) { + try { + final DataInfo info = prepareDataInfo(rs); // TODO + + final String orgId1 = createOpenaireId(20, rs.getString("id1"), true); + final String orgId2 = createOpenaireId(40, rs.getString("id2"), true); + + final List collectedFrom = listKeyValues( + createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname")); + + final Relation r1 = new Relation(); + r1.setRelType(ORG_ORG_RELTYPE); + r1.setSubRelType(ORG_ORG_SUBRELTYPE); + r1.setRelClass(ORG_ORG_CLASS); + r1.setSource(orgId1); + r1.setTarget(orgId2); + r1.setCollectedfrom(collectedFrom); + r1.setDataInfo(info); + r1.setLastupdatetimestamp(lastUpdateTimestamp); + + final Relation r2 = new Relation(); + r2.setRelType(ORG_ORG_RELTYPE); + r2.setSubRelType(ORG_ORG_SUBRELTYPE); + r2.setRelClass(ORG_ORG_CLASS); + r2.setSource(orgId2); + r2.setTarget(orgId1); + r2.setCollectedfrom(collectedFrom); + r2.setDataInfo(info); + r2.setLastupdatetimestamp(lastUpdateTimestamp); + + return Arrays.asList(r1, r2); + } catch (final Exception e) { + throw new RuntimeException(e); + } + } + private List prepareContext(final String id, final DataInfo dataInfo) { final Context context = new Context(); context.setId(id); diff --git a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java index a199da2d5..d4d1262d5 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java +++ b/dhp-workflows/dhp-graph-mapper/src/main/java/eu/dnetlib/dhp/oa/graph/raw/MigrateOpenOrgsApplication.java @@ -45,7 +45,7 @@ public class MigrateOpenOrgsApplication extends AbstractDbApplication { mapper.execute("queryOrganizationsFromOpenOrgsDB.sql", mapper::processOrganization); log.info("Processing simrels..."); - // smdbe.execute("querySimilarityFromOpenOrgsDB.sql", smdbe::xxxx); + mapper.execute("querySimilarityFromOpenOrgsDB.sql", mapper::processOrgOrgSimRels); log.info("All done."); } diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql index 93eb4c128..88efa3782 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOrganizationsFromOpenOrgsDB.sql @@ -60,7 +60,7 @@ SELECT false AS ecsmevalidated, false AS inferred, false AS deletedbyinference, - 0.88 AS trust, + 0.5 AS trust, '' AS inferenceprovenance, 'openaire____::openorgs' AS collectedfromid, 'OpenOrgs Database' AS collectedfromname, diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/querySimilarityFromOpenOrgsDB.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/querySimilarityFromOpenOrgsDB.sql index 63df41e09..464baf284 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/querySimilarityFromOpenOrgsDB.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/querySimilarityFromOpenOrgsDB.sql @@ -1,12 +1,24 @@ SELECT - local_id AS id1, - oa_original_id AS id2 -FROM openaire_simrels WHERE reltype = 'is_similar' + local_id AS id1, + oa_original_id AS id2, + 'openaire____::openorgs' AS collectedfromid, + 'OpenOrgs Database' AS collectedfromname, + false AS inferred, + false AS deletedbyinference, + 0.99 AS trust, + '' AS inferenceprovenance +FROM oa_duplicates WHERE reltype = 'is_similar' UNION ALL SELECT o.id AS id1, - 'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2 + 'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2, + 'openaire____::openorgs' AS collectedfromid, + 'OpenOrgs Database' AS collectedfromname, + false AS inferred, + false AS deletedbyinference, + 0.99 AS trust, + '' AS inferenceprovenance FROM other_names n LEFT OUTER JOIN organizations o ON (n.id = o.id)