import simrels

This commit is contained in:
Michele Artini 2020-07-22 09:59:24 +02:00
parent 334eb2b927
commit 94d2e7523f
4 changed files with 58 additions and 6 deletions

View File

@ -75,6 +75,10 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication
public static final String SOURCE_TYPE = "source_type"; public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type"; public static final String TARGET_TYPE = "target_type";
private static final String ORG_ORG_RELTYPE = "organizationOrganization";
private static final String ORG_ORG_SUBRELTYPE = "dedup";
private static final String ORG_ORG_CLASS = "isSimilarTo";
private final DbClient dbClient; private final DbClient dbClient;
private final long lastUpdateTimestamp; private final long lastUpdateTimestamp;
@ -423,6 +427,42 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication
} }
} }
public List<Oaf> processOrgOrgSimRels(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs); // TODO
final String orgId1 = createOpenaireId(20, rs.getString("id1"), true);
final String orgId2 = createOpenaireId(40, rs.getString("id2"), true);
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType(ORG_ORG_RELTYPE);
r1.setSubRelType(ORG_ORG_SUBRELTYPE);
r1.setRelClass(ORG_ORG_CLASS);
r1.setSource(orgId1);
r1.setTarget(orgId2);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType(ORG_ORG_RELTYPE);
r2.setSubRelType(ORG_ORG_SUBRELTYPE);
r2.setRelClass(ORG_ORG_CLASS);
r2.setSource(orgId2);
r2.setTarget(orgId1);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
private List<Context> prepareContext(final String id, final DataInfo dataInfo) { private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
final Context context = new Context(); final Context context = new Context();
context.setId(id); context.setId(id);

View File

@ -45,7 +45,7 @@ public class MigrateOpenOrgsApplication extends AbstractDbApplication {
mapper.execute("queryOrganizationsFromOpenOrgsDB.sql", mapper::processOrganization); mapper.execute("queryOrganizationsFromOpenOrgsDB.sql", mapper::processOrganization);
log.info("Processing simrels..."); log.info("Processing simrels...");
// smdbe.execute("querySimilarityFromOpenOrgsDB.sql", smdbe::xxxx); mapper.execute("querySimilarityFromOpenOrgsDB.sql", mapper::processOrgOrgSimRels);
log.info("All done."); log.info("All done.");
} }

View File

@ -60,7 +60,7 @@ SELECT
false AS ecsmevalidated, false AS ecsmevalidated,
false AS inferred, false AS inferred,
false AS deletedbyinference, false AS deletedbyinference,
0.88 AS trust, 0.5 AS trust,
'' AS inferenceprovenance, '' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid, 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname, 'OpenOrgs Database' AS collectedfromname,

View File

@ -1,12 +1,24 @@
SELECT SELECT
local_id AS id1, local_id AS id1,
oa_original_id AS id2 oa_original_id AS id2,
FROM openaire_simrels WHERE reltype = 'is_similar' 'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
false AS inferred,
false AS deletedbyinference,
0.99 AS trust,
'' AS inferenceprovenance
FROM oa_duplicates WHERE reltype = 'is_similar'
UNION ALL UNION ALL
SELECT SELECT
o.id AS id1, o.id AS id1,
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2 'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
false AS inferred,
false AS deletedbyinference,
0.99 AS trust,
'' AS inferenceprovenance
FROM other_names n FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id) LEFT OUTER JOIN organizations o ON (n.id = o.id)