[WebCrawlAffiliation]remove from the creation of the action set the relations for pmc and pmid. Only doi are allowed #462

Merged
claudio.atzori merged 1 commits from affiliationFromWebCrawlOnlyDOI into beta 2024-07-17 11:12:33 +02:00
2 changed files with 11 additions and 7 deletions

View File

@ -42,6 +42,9 @@ public class Constants {
public static final String NULL = "NULL"; public static final String NULL = "NULL";
public static final String NA = "N/A"; public static final String NA = "N/A";
public static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
public static final String WEB_CRAWL_NAME = "Web Crawl";
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private Constants() { private Constants() {

View File

@ -21,6 +21,7 @@ import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import eu.dnetlib.dhp.actionmanager.Constants;
import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.application.ArgumentApplicationParser;
import eu.dnetlib.dhp.schema.action.AtomicAction; import eu.dnetlib.dhp.schema.action.AtomicAction;
import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelConstants;
@ -29,6 +30,7 @@ import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory;
import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils;
import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner;
import eu.dnetlib.dhp.schema.oaf.utils.PidType; import eu.dnetlib.dhp.schema.oaf.utils.PidType;
import io.netty.util.Constant;
import scala.Tuple2; import scala.Tuple2;
/** /**
@ -44,8 +46,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
private static final String PMID_PREFIX = "50|pmid________::"; private static final String PMID_PREFIX = "50|pmid________::";
private static final String PMCID_PREFIX = "50|pmc_________::"; private static final String PMCID_PREFIX = "50|pmc_________::";
private static final String WEB_CRAWL_ID = "10|openaire____::fb98a192f6a055ba495ef414c330834b";
private static final String WEB_CRAWL_NAME = "Web Crawl";
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
@ -104,8 +105,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
final String ror = ROR_PREFIX final String ror = ROR_PREFIX
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror"))); + IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror)); ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
ret.addAll(createAffiliationRelationPairPMID(row.getAs("pmid"), ror));
ret.addAll(createAffiliationRelationPairPMCID(row.getAs("pmcid"), ror));
return ret return ret
.iterator(); .iterator();
@ -139,8 +139,9 @@ public class CreateActionSetFromWebEntries implements Serializable {
"institution", functions "institution", functions
.explode( .explode(
functions.col("institutions"))) functions.col("institutions")))
.selectExpr( .selectExpr(
"id", "doi", "ids.pmcid as pmcid", "ids.pmid as pmid", "institution.ror as ror", "id", "doi", "institution.ror as ror",
"institution.country_code as country_code", "publication_year") "institution.country_code as country_code", "publication_year")
.distinct(); .distinct();
@ -214,7 +215,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
ModelConstants.IS_AUTHOR_INSTITUTION_OF, ModelConstants.IS_AUTHOR_INSTITUTION_OF,
Arrays Arrays
.asList( .asList(
OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)), OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, null, false, false, false, null, false, false,
@ -233,7 +234,7 @@ public class CreateActionSetFromWebEntries implements Serializable {
ModelConstants.HAS_AUTHOR_INSTITUTION, ModelConstants.HAS_AUTHOR_INSTITUTION,
Arrays Arrays
.asList( .asList(
OafMapperUtils.keyValue(WEB_CRAWL_ID, WEB_CRAWL_NAME)), OafMapperUtils.keyValue(Constants.WEB_CRAWL_ID, Constants.WEB_CRAWL_NAME)),
OafMapperUtils OafMapperUtils
.dataInfo( .dataInfo(
false, null, false, false, false, null, false, false,