forked from D-Net/dnet-hadoop
EOSC Services - removed fields from mapping, testing preparation
This commit is contained in:
parent
a8c51f6f16
commit
b6a7ff3a99
|
@ -148,10 +148,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
log.info("Processing Organizations...");
|
log.info("Processing Organizations...");
|
||||||
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
smdbe.execute("queryOrganizations.sql", smdbe::processOrganization, verifyNamespacePrefix);
|
||||||
|
|
||||||
log.info("Processing relationsNoRemoval ds <-> orgs ...");
|
log.info("Processing relations services <-> orgs ...");
|
||||||
smdbe
|
smdbe
|
||||||
.execute(
|
.execute(
|
||||||
"queryDatasourceOrganization.sql", smdbe::processDatasourceOrganization,
|
"queryServiceOrganization.sql", smdbe::processServiceOrganization,
|
||||||
verifyNamespacePrefix);
|
verifyNamespacePrefix);
|
||||||
|
|
||||||
log.info("Processing projects <-> orgs ...");
|
log.info("Processing projects <-> orgs ...");
|
||||||
|
@ -268,13 +268,10 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
ds.setOdpolicies(field(rs.getString("odpolicies"), info));
|
||||||
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
ds.setOdlanguages(prepareListFields(rs.getArray("odlanguages"), info));
|
||||||
ds.setLanguages(listValues(rs.getArray("languages")));
|
ds.setLanguages(listValues(rs.getArray("languages")));
|
||||||
ds.setOdcontenttypes(prepareListFields(rs.getArray("odcontenttypes"), info));
|
|
||||||
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
ds.setAccessinfopackage(prepareListFields(rs.getArray("accessinfopackage"), info));
|
||||||
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
ds.setReleasestartdate(field(asString(rs.getDate("releasestartdate")), info));
|
||||||
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
ds.setReleaseenddate(field(asString(rs.getDate("releaseenddate")), info));
|
||||||
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
ds.setMissionstatementurl(field(rs.getString("missionstatementurl"), info));
|
||||||
ds.setDataprovider(field(rs.getBoolean("dataprovider"), info));
|
|
||||||
ds.setServiceprovider(field(rs.getBoolean("serviceprovider"), info));
|
|
||||||
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
ds.setDatabaseaccesstype(field(rs.getString("databaseaccesstype"), info));
|
||||||
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
ds.setDatauploadtype(field(rs.getString("datauploadtype"), info));
|
||||||
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
ds.setDatabaseaccessrestriction(field(rs.getString("databaseaccessrestriction"), info));
|
||||||
|
@ -293,10 +290,8 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
rs.getString("issnLinking"), info)); // Journal
|
rs.getString("issnLinking"), info)); // Journal
|
||||||
|
|
||||||
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
|
ds.setResearchentitytypes(listValues(rs.getArray("researchentitytypes")));
|
||||||
ds.setProvidedproducttypes(listValues(rs.getArray("providedproducttypes")));
|
|
||||||
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
ds.setJurisdiction(prepareQualifierSplitting(rs.getString("jurisdiction")));
|
||||||
ds.setThematic(rs.getBoolean("thematic"));
|
ds.setThematic(rs.getBoolean("thematic"));
|
||||||
ds.setKnowledgegraph(rs.getBoolean("knowledgegraph"));
|
|
||||||
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
ds.setContentpolicies(prepareListOfQualifiers(rs.getArray("contentpolicies")));
|
||||||
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
|
ds.setSubmissionpolicyurl(rs.getString("submissionpolicyurl"));
|
||||||
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
|
ds.setPreservationpolicyurl(rs.getString("preservationpolicyurl"));
|
||||||
|
@ -434,11 +429,11 @@ public class MigrateDbEntitiesApplication extends AbstractMigrationApplication i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Oaf> processDatasourceOrganization(final ResultSet rs) {
|
public List<Oaf> processServiceOrganization(final ResultSet rs) {
|
||||||
try {
|
try {
|
||||||
final DataInfo info = prepareDataInfo(rs);
|
final DataInfo info = prepareDataInfo(rs);
|
||||||
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
final String orgId = createOpenaireId(20, rs.getString("organization"), true);
|
||||||
final String dsId = createOpenaireId(10, rs.getString("datasource"), true);
|
final String dsId = createOpenaireId(10, rs.getString("service"), true);
|
||||||
final List<KeyValue> collectedFrom = listKeyValues(
|
final List<KeyValue> collectedFrom = listKeyValues(
|
||||||
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
createOpenaireId(10, rs.getString("collectedfromid"), true), rs.getString("collectedfromname"));
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,11 @@
|
||||||
<value></value>
|
<value></value>
|
||||||
<description>a blacklist of nsprefixes (comma separeted)</description>
|
<description>a blacklist of nsprefixes (comma separeted)</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>reuseContent</name>
|
||||||
|
<value>false</value>
|
||||||
|
<description>reuse content in the aggregator database</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>sparkDriverMemory</name>
|
<name>sparkDriverMemory</name>
|
||||||
<description>memory for driver process</description>
|
<description>memory for driver process</description>
|
||||||
|
@ -85,12 +90,20 @@
|
||||||
</configuration>
|
</configuration>
|
||||||
</global>
|
</global>
|
||||||
|
|
||||||
<start to="ImportDB"/>
|
<start to="reuse_db"/>
|
||||||
|
|
||||||
<kill name="Kill">
|
<kill name="Kill">
|
||||||
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
<message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
|
||||||
</kill>
|
</kill>
|
||||||
|
|
||||||
|
<decision name="reuse_db">
|
||||||
|
<switch>
|
||||||
|
<case to="ImportDB">${wf:conf('reuseContent') eq false}</case>
|
||||||
|
<case to="GenerateEntities">${wf:conf('reuseContent') eq true}</case>
|
||||||
|
<default to="ImportDB"/>
|
||||||
|
</switch>
|
||||||
|
</decision>
|
||||||
|
|
||||||
<action name="ImportDB">
|
<action name="ImportDB">
|
||||||
<java>
|
<java>
|
||||||
<prepare>
|
<prepare>
|
||||||
|
@ -125,6 +138,55 @@
|
||||||
<arg>--action</arg><arg>claims</arg>
|
<arg>--action</arg><arg>claims</arg>
|
||||||
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
<arg>--nsPrefixBlacklist</arg><arg>${nsPrefixBlacklist}</arg>
|
||||||
</java>
|
</java>
|
||||||
|
<ok to="GenerateEntities"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="GenerateEntities">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>GenerateEntities</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.raw.GenerateEntitiesApplication</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory ${sparkExecutorMemory}
|
||||||
|
--executor-cores ${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePaths</arg><arg>${contentPath}/db_records,${contentPath}/db_claims</arg>
|
||||||
|
<arg>--targetPath</arg><arg>${workingDir}/entities</arg>
|
||||||
|
<arg>--isLookupUrl</arg><arg>${isLookupUrl}</arg>
|
||||||
|
<arg>--shouldHashId</arg><arg>true</arg>
|
||||||
|
</spark>
|
||||||
|
<ok to="GenerateGraph"/>
|
||||||
|
<error to="Kill"/>
|
||||||
|
</action>
|
||||||
|
|
||||||
|
<action name="GenerateGraph">
|
||||||
|
<spark xmlns="uri:oozie:spark-action:0.2">
|
||||||
|
<master>yarn</master>
|
||||||
|
<mode>cluster</mode>
|
||||||
|
<name>GenerateGraph</name>
|
||||||
|
<class>eu.dnetlib.dhp.oa.graph.raw.DispatchEntitiesApplication</class>
|
||||||
|
<jar>dhp-graph-mapper-${projectVersion}.jar</jar>
|
||||||
|
<spark-opts>
|
||||||
|
--executor-memory ${sparkExecutorMemory}
|
||||||
|
--executor-cores ${sparkExecutorCores}
|
||||||
|
--driver-memory=${sparkDriverMemory}
|
||||||
|
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||||
|
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||||
|
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||||
|
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||||
|
--conf spark.sql.shuffle.partitions=7680
|
||||||
|
</spark-opts>
|
||||||
|
<arg>--sourcePath</arg><arg>${workingDir}/entities</arg>
|
||||||
|
<arg>--graphRawPath</arg><arg>${workingDir}/graph_aggregator</arg>
|
||||||
|
</spark>
|
||||||
<ok to="End"/>
|
<ok to="End"/>
|
||||||
<error to="Kill"/>
|
<error to="Kill"/>
|
||||||
</action>
|
</action>
|
||||||
|
|
|
@ -109,7 +109,6 @@ SELECT
|
||||||
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
|
d.lastconsenttermsofusedate AS lastconsenttermsofusedate,
|
||||||
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
d.jurisdiction||'@@@eosc:jurisdictions' AS jurisdiction,
|
||||||
d.thematic AS thematic,
|
d.thematic AS thematic,
|
||||||
-- REMOVED ???: d.knowledge_graph AS knowledgegraph,
|
|
||||||
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
|
array(select unnest(d.content_policies)||'@@@eosc:contentpolicies') AS contentpolicies,
|
||||||
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
|
nullif(trim(d.submission_policy_url), '') AS submissionpolicyurl,
|
||||||
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,
|
nullif(trim(d.preservation_policy_url), '') AS preservationpolicyurl,
|
||||||
|
|
|
@ -129,9 +129,6 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
getValueAsList("odlanguages", fields),
|
getValueAsList("odlanguages", fields),
|
||||||
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
|
ds.getOdlanguages().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||||
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
|
assertEquals(getValueAsList("languages", fields), ds.getLanguages());
|
||||||
assertEquals(
|
|
||||||
getValueAsList("odcontenttypes", fields),
|
|
||||||
ds.getOdcontenttypes().stream().map(Field::getValue).collect(Collectors.toList()));
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
getValueAsList("accessinfopackage", fields),
|
getValueAsList("accessinfopackage", fields),
|
||||||
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
|
ds.getAccessinfopackage().stream().map(Field::getValue).collect(Collectors.toList()));
|
||||||
|
@ -155,13 +152,11 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
|
assertEquals(getValueAsString("certificates", fields), ds.getCertificates());
|
||||||
|
|
||||||
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
|
assertEquals(getValueAsList("researchentitytypes", fields), ds.getResearchentitytypes());
|
||||||
assertEquals(getValueAsList("providedproducttypes", fields), ds.getProvidedproducttypes());
|
|
||||||
|
|
||||||
assertEquals("National", ds.getJurisdiction().getClassid());
|
assertEquals("National", ds.getJurisdiction().getClassid());
|
||||||
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
assertEquals("eosc:jurisdictions", ds.getJurisdiction().getSchemeid());
|
||||||
|
|
||||||
assertTrue(ds.getThematic());
|
assertTrue(ds.getThematic());
|
||||||
assertTrue(ds.getKnowledgegraph());
|
|
||||||
|
|
||||||
HashSet<String> cpSchemeId = ds
|
HashSet<String> cpSchemeId = ds
|
||||||
.getContentpolicies()
|
.getContentpolicies()
|
||||||
|
@ -246,7 +241,7 @@ public class MigrateDbEntitiesApplicationTest {
|
||||||
public void testProcessDatasourceOrganization() throws Exception {
|
public void testProcessDatasourceOrganization() throws Exception {
|
||||||
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
final List<TypedField> fields = prepareMocks("datasourceorganization_resultset_entry.json");
|
||||||
|
|
||||||
final List<Oaf> list = app.processDatasourceOrganization(rs);
|
final List<Oaf> list = app.processServiceOrganization(rs);
|
||||||
|
|
||||||
assertEquals(2, list.size());
|
assertEquals(2, list.size());
|
||||||
verifyMocks(fields);
|
verifyMocks(fields);
|
||||||
|
|
|
@ -140,13 +140,6 @@
|
||||||
"Swedish"
|
"Swedish"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"field": "odcontenttypes",
|
|
||||||
"type": "array",
|
|
||||||
"value": [
|
|
||||||
"Journal articles"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"field": "accessinfopackage",
|
"field": "accessinfopackage",
|
||||||
"type": "array",
|
"type": "array",
|
||||||
|
@ -169,16 +162,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"value": null
|
"value": null
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"field": "dataprovider",
|
|
||||||
"type": "boolean",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"field": "serviceprovider",
|
|
||||||
"type": "boolean",
|
|
||||||
"value": null
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"field": "databaseaccesstype",
|
"field": "databaseaccesstype",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
|
@ -254,24 +254,6 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"languages" : [ "English", "German" ],
|
"languages" : [ "English", "German" ],
|
||||||
"odcontenttypes": [
|
|
||||||
{
|
|
||||||
"value": "Journal articles",
|
|
||||||
"dataInfo": {
|
|
||||||
"invisible": false,
|
|
||||||
"inferred": false,
|
|
||||||
"deletedbyinference": false,
|
|
||||||
"trust": "0.900",
|
|
||||||
"inferenceprovenance": null,
|
|
||||||
"provenanceaction": {
|
|
||||||
"classid": "sysimport:crosswalk:entityregistry",
|
|
||||||
"classname": "sysimport:crosswalk:entityregistry",
|
|
||||||
"schemeid": "dnet:provenanceActions",
|
|
||||||
"schemename": "dnet:provenanceActions"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"accessinfopackage": [
|
"accessinfopackage": [
|
||||||
{
|
{
|
||||||
"value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai",
|
"value": "http://www.revista.vocesdelaeducacion.com.mx/index.php/index/oai",
|
||||||
|
@ -387,7 +369,6 @@
|
||||||
"schemename": "eosc:jurisdictions"
|
"schemename": "eosc:jurisdictions"
|
||||||
},
|
},
|
||||||
"thematic": true,
|
"thematic": true,
|
||||||
"knowledgegraph": true,
|
|
||||||
"contentpolicies": [
|
"contentpolicies": [
|
||||||
{
|
{
|
||||||
"classid": "Journal article",
|
"classid": "Journal article",
|
||||||
|
|
Loading…
Reference in New Issue