EOSC Services - removed fields from mapping, testing preparation

This commit is contained in:
Claudio Atzori 2022-05-02 15:52:33 +02:00
parent a8c51f6f16
commit b6a7ff3a99
6 changed files with 68 additions and 53 deletions

View File

@ -148,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
log.info("Processing Organizations...");
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
log.info("Processing relationsNoRemoval ds <-> orgs ...");
log.info("Processing relations services <-> orgs ...");
smdbe
.execute(
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
"queryServiceOrganization.sql", smdbe::processServiceOrganization,
verifyNamespacePrefix);
log.info("Processing projects <-> orgs ...");
@ -268,13 +268,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
ds.setLanguages(listValues(rs.getArray("languages")));
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
@ -293,10 +290,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
rs.getString("issnLinking"), info)); // Journal
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes")));
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
ds.setThematic(rs.getBoolean("thematic"));
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
@ -434,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
}
}
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
public List<Oaf> processServiceOrganization(final ResultSet rs) {
try {
final DataInfo info = prepareDataInfo(rs);
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
final String dsId = createOpenaireId(10, rs.getString("service"), true);
final List<KeyValue> collectedFrom = listKeyValues(
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));

View File

@ -30,6 +30,11 @@
<value></value>
<description>a blacklist of nsprefixes (comma separeted)</description>
</property>
<property>
<name>reuseContent</name>
<value>false</value>
<description>reuse content in the aggregator database</description>
</property>
<property>
<name>sparkDriverMemory</name>
<description>memory for driver process</description>
@ -85,12 +90,20 @@
</configuration>
</global>
<start to="ImportDB"/>
<start to="reuse_db"/>
<kill name="Kill">
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<decision name="reuse_db">
<switch>
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
<default to="ImportDB"/>
</switch>
</decision>
<action name="ImportDB">
<java>
<prepare>
@ -125,6 +138,55 @@
<arg>--action</arg><arg>claims</arg>
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
</java>
<ok to="GenerateEntities"/>
<error to="Kill"/>
</action>
<action name="GenerateEntities">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateEntities</name>
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
</spark-opts>
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
<arg>--shouldHashId</arg><arg>true</arg>
</spark>
<ok to="GenerateGraph"/>
<error to="Kill"/>
</action>
<action name="GenerateGraph">
<spark xmlns="uri:oozie:spark-action:0.2">
<master>yarn</master>
<mode>cluster</mode>
<name>GenerateGraph</name>
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
<spark-opts>
--executor-memory ${sparkExecutorMemory}
--executor-cores ${sparkExecutorCores}
--driver-memory=${sparkDriverMemory}
--conf spark.extraListeners=${spark2ExtraListeners}
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
--conf spark.sql.shuffle.partitions=7680
</spark-opts>
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
</spark>
<ok to="End"/>
<error to="Kill"/>
</action>

View File

@ -109,7 +109,6 @@ SELECT
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
d.thematic AS thematic,
-- REMOVED ???: d.knowledge_graph AS knowledgegraph,
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,

View File

@ -129,9 +129,6 @@ public class MigrateDbEntitiesApplicationTest {
getValueAsList("odlanguages", fields),
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
assertEquals(
getValueAsList("odcontenttypes", fields),
ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList()));
assertEquals(
getValueAsList("accessinfopackage", fields),
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
@ -155,13 +152,11 @@ public class MigrateDbEntitiesApplicationTest {
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes());
assertEquals("National", ds.getJurisdiction().getClassid());
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
assertTrue(ds.getThematic());
assertTrue(ds.getKnowledgegraph());
HashSet<String> cpSchemeId = ds
.getContentpolicies()
@ -246,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
public void testProcessDatasourceOrganization() throws Exception {
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
final List<Oaf> list = app.processDatasourceOrganization(rs);
final List<Oaf> list = app.processServiceOrganization(rs);
assertEquals(2, list.size());
verifyMocks(fields);

View File

@ -140,13 +140,6 @@
"Swedish"
]
},
{
"field": "odcontenttypes",
"type": "array",
"value": [
"Journal articles"
]
},
{
"field": "accessinfopackage",
"type": "array",
@ -169,16 +162,6 @@
"type": "string",
"value": null
},
{
"field": "dataprovider",
"type": "boolean",
"value": null
},
{
"field": "serviceprovider",
"type": "boolean",
"value": null
},
{
"field": "databaseaccesstype",
"type": "string",

View File

@ -254,24 +254,6 @@
}
],
"languages" : [ "English", "German" ],
"odcontenttypes": [
{
"value": "Journal articles",
"dataInfo": {
"invisible": false,
"inferred": false,
"deletedbyinference": false,
"trust": "0.900",
"inferenceprovenance": null,
"provenanceaction": {
"classid": "sysimport:crosswalk:entityregistry",
"classname": "sysimport:crosswalk:entityregistry",
"schemeid": "dnet:provenanceActions",
"schemename": "dnet:provenanceActions"
}
}
}
],
"accessinfopackage": [
{
"value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai",
@ -387,7 +369,6 @@
"schemename": "eosc:jurisdictions"
},
"thematic": true,
"knowledgegraph": true,
"contentpolicies": [
{
"classid": "Journal article",