import simrels

This commit is contained in:
Michele Artini 2020-07-22 09:59:24 +02:00
parent 334eb2b927
commit 94d2e7523f
4 changed files with 58 additions and 6 deletions

View File

@ -75,6 +75,10 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication
public static final String SOURCE_TYPE = "source_type";
public static final String TARGET_TYPE = "target_type";
private static final String ORG_ORG_RELTYPE = "organizationOrganization";
private static final String ORG_ORG_SUBRELTYPE = "dedup";
private static final String ORG_ORG_CLASS = "isSimilarTo";
private final DbClient dbClient;
private final long lastUpdateTimestamp;
@ -423,6 +427,42 @@ public abstract class AbstractDbApplication extends AbstractMigrationApplication
}
}
public List<Oaf> processOrgOrgSimRels(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs); // TODO
final String orgId1 = createOpenaireId(20, rs.getString("id1"), true);
final String orgId2 = createOpenaireId(40, rs.getString("id2"), true);
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
final Relation r1 = new Relation();
r1.setRelType(ORG_ORG_RELTYPE);
r1.setSubRelType(ORG_ORG_SUBRELTYPE);
r1.setRelClass(ORG_ORG_CLASS);
r1.setSource(orgId1);
r1.setTarget(orgId2);
r1.setCollectedfrom(collectedFrom);
r1.setDataInfo(info);
r1.setLastupdatetimestamp(lastUpdateTimestamp);
final Relation r2 = new Relation();
r2.setRelType(ORG_ORG_RELTYPE);
r2.setSubRelType(ORG_ORG_SUBRELTYPE);
r2.setRelClass(ORG_ORG_CLASS);
r2.setSource(orgId2);
r2.setTarget(orgId1);
r2.setCollectedfrom(collectedFrom);
r2.setDataInfo(info);
r2.setLastupdatetimestamp(lastUpdateTimestamp);
return Arrays.asList(r1, r2);
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
private List<Context> prepareContext(final String id, final DataInfo dataInfo) {
final Context context = new Context();
context.setId(id);

View File

@ -45,7 +45,7 @@ public class MigrateOpenOrgsApplication extends AbstractDbApplication {
mapper.execute("queryOrganizationsFromOpenOrgsDB.sql", mapper::processOrganization);
log.info("Processing simrels...");
// smdbe.execute("querySimilarityFromOpenOrgsDB.sql", smdbe::xxxx);
mapper.execute("querySimilarityFromOpenOrgsDB.sql", mapper::processOrgOrgSimRels);
log.info("All done.");
}

View File

@ -60,7 +60,7 @@ SELECT
false AS ecsmevalidated,
false AS inferred,
false AS deletedbyinference,
0.88 AS trust,
0.5 AS trust,
'' AS inferenceprovenance,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,

View File

@ -1,12 +1,24 @@
SELECT
local_id AS id1,
oa_original_id AS id2
FROM openaire_simrels WHERE reltype = 'is_similar'
local_id AS id1,
oa_original_id AS id2,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
false AS inferred,
false AS deletedbyinference,
0.99 AS trust,
'' AS inferenceprovenance
FROM oa_duplicates WHERE reltype = 'is_similar'
UNION ALL
SELECT
o.id AS id1,
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2,
'openaire____::openorgs' AS collectedfromid,
'OpenOrgs Database' AS collectedfromname,
false AS inferred,
false AS deletedbyinference,
0.99 AS trust,
'' AS inferenceprovenance
FROM other_names n
LEFT OUTER JOIN organizations o ON (n.id = o.id)