From 81bfe3fe32c9a8926b0fed686a781627f6fe5ab2 Mon Sep 17 00:00:00 2001 From: Claudio Atzori Date: Thu, 26 Sep 2024 09:23:44 +0200 Subject: [PATCH] WIP merged beta into main --- .gitignore | 1 + .../WritePredefinedProjectPropertiesTest.java | 10 +- .../eu/dnetlib/dhp/common/PacePerson.java | 2 +- .../dhp/common/collection/HttpConnector2.java | 2 - .../dhp/schema/oaf/utils/MergeUtils.java | 4 + .../scholexplorer/relation/relations.json | 8 + .../application/SparkScalaApplication.scala | 7 +- .../dhp/sx/graph/scholix/ScholixUtils.scala | 19 +- .../schema/oaf/utils/OafMapperUtilsTest.java | 2 +- dhp-pace-core/pom.xml | 100 +- ...ustering.java => LegalnameClustering.java} | 31 +- .../pace/common/AbstractPaceFunctions.java | 94 + .../java/eu/dnetlib/pace/model/FieldDef.java | 20 + .../eu/dnetlib/pace/model/SparkModel.scala | 26 +- .../java/eu/dnetlib/pace/tree/CityMatch.java | 48 - .../java/eu/dnetlib/pace/tree/CodeMatch.java | 51 + .../eu/dnetlib/pace/tree/CountryMatch.java | 6 + .../pace/tree/JaroWinklerLegalname.java | 59 + .../pace/tree/JaroWinklerNormalizedName.java | 74 - .../eu/dnetlib/pace/tree/KeywordMatch.java | 50 - .../pace/tree/support/TreeNodeDef.java | 2 +- .../pace/tree/support/TreeNodeStats.java | 33 +- .../pace/tree/support/TreeProcessor.java | 8 +- .../eu/dnetlib/pace/config/country_map.csv | 239 ++ .../dnetlib/pace/util/SparkCompatUtils.scala | 12 + .../dnetlib/pace/util/SparkCompatUtils.scala | 12 + .../clustering/ClusteringFunctionTest.java | 38 +- .../dnetlib/pace/common/PaceFunctionTest.java | 43 + .../pace/comparators/ComparatorTest.java | 70 +- .../java/eu/dnetlib/pace/util/UtilTest.java | 1 + dhp-shade-package/pom.xml | 169 ++ dhp-workflows/dhp-actionmanager/pom.xml | 43 - .../dnetlib/dhp/actionmanager/ISClient.java | 27 +- .../PrepareAffiliationRelations.java | 1 - .../bipfinder/SparkAtomicActionScoreJob.java | 3 +- .../PrepareSDGSparkJob.java | 102 +- .../fosnodoi/CreateActionSetSparkJob.java | 7 - .../personentity/CoAuthorshipIterator.java | 80 + .../actionmanager/personentity/Coauthors.java | 20 + .../actionmanager/personentity/Couples.java | 40 + .../personentity/ExtractPerson.java | 437 ++++ .../actionmanager/personentity/WorkList.java | 25 + .../sdgnodoi/CreateActionSetSparkJob.java | 91 + .../CreateActionSetFromWebEntries.java | 40 +- .../webcrawl/RemoveRelationFromActionSet.java | 158 ++ .../dhp/collection/CollectorWorker.java | 62 +- .../dhp/collection/orcid/model/Author.java | 3 + .../dhp/collection/orcid/model/ORCIDItem.java | 3 + .../dhp/collection/orcid/model/Work.java | 2 + .../collection/plugin/CollectorPlugin.java | 2 +- .../gtr2/Gtr2PublicationsCollectorPlugin.java | 43 + .../plugin/gtr2/Gtr2PublicationsIterator.java | 215 ++ .../osf/OsfPreprintsCollectorPlugin.java | 52 + .../plugin/osf/OsfPreprintsIterator.java | 151 ++ .../researchfi/ResearchFiCollectorPlugin.java | 76 + .../plugin/researchfi/ResearchFiIterator.java | 117 + .../collection/plugin/utils/XMLIterator.java | 45 +- .../input_actionset_parameter.json | 8 +- .../opencitations/remap_parameters.json | 13 +- .../personentity/as_parameters.json | 25 + .../actionmanager/personentity/job.properties | 2 + .../personentity/oozie_app/config-default.xml | 30 + .../personentity/oozie_app/workflow.xml | 111 + .../actionmanager/sdgnodoi/as_parameters.json | 20 + .../sdgnodoi/oozie_app/config-default.xml | 30 + .../sdgnodoi/oozie_app/workflow.xml | 125 + .../dhp/actionmanager/webcrawl/job.properties | 14 +- .../webcrawl/oozie_app/workflow.xml | 34 +- .../collection/crossref/Crossref2Oaf.scala | 13 +- .../ebi/SparkCreateBaselineDataFrame.scala | 36 +- .../dnetlib/dhp/sx/bio/pubmed/PMParser.scala | 3 +- .../createunresolvedentities/ProduceTest.java | 5 +- .../opencitations/ReadCOCITest.java | 4 +- .../opencitations/RemapTest.java | 8 +- .../actionmanager/person/CreatePersonAS.java | 213 ++ .../actionmanager/webcrawl/CreateASTest.java | 23 +- .../webcrawl/RemoveFromASTest.java | 108 + .../plugin/file/FileGZipMultipleNodeTest.java | 64 + .../gtr2/Gtr2PublicationsIteratorTest.java | 103 + .../osf/OsfPreprintsCollectorPluginTest.java | 122 + .../ResearchFiCollectorPluginTest.java | 58 + .../plugin/rest/OsfPreprintCollectorTest.java | 105 - .../actionmanager/person/WorkJson/part-00000 | 10 + .../webcrawl/blackListRemove/not_irish.json | 1 + .../dhp/collection/crossref/issn_pub.json | 4 - .../dhp/collection/plugin/file/dblp.gz | Bin 0 -> 1097 bytes .../crossref/CrossrefMappingTest.scala | 13 +- .../dhp/collection/mag/MAGMappingTest.scala | 2 +- .../dnetlib/dhp/sx/bio/BioScholixTest.scala | 20 +- dhp-workflows/dhp-dedup-openaire/pom.xml | 24 - .../dhp/oa/dedup/SparkCreateMergeRels.java | 4 +- .../dhp/oa/dedup/SparkPropagateRelation.java | 4 +- .../dhp/oa/dedup/DecisionTreeTest.java | 85 + .../dnetlib/dhp/oa/dedup/SparkDedupTest.java | 14 +- .../dhp/oa/dedup/SparkOpenorgsDedupTest.java | 9 +- .../oa/dedup/SparkPublicationRootsTest2.java | 4 +- .../dhp/oa/dedup/jpath/JsonPathTest.java | 26 + .../dnetlib/dhp/dedup/conf/org.curr.conf.json | 187 +- .../dnetlib/dhp/dedup/json/dataset_merge.json | 2 +- .../oa/dedup/jpath/organization_example1.json | 1 + .../dnetlib/dhp/bulktag/SparkBulkTagJob.java | 45 +- .../bulktag/community/TaggingConstants.java | 6 + .../dnetlib/dhp/bulktag/BulkTagJobTest.java | 132 ++ .../update_datasource/organization | 4 + .../publication/update_datasource/project | 4 + .../oa/graph/raw_all/oozie_app/workflow.xml | 30 +- .../graph/sql/queryOpenOrgsForOrgsDedup.sql | 12 +- .../dhp/sx/create_scholix_dump_params.json | 5 + .../eu/dnetlib/dhp/sx/relation/relations.json | 166 ++ .../dhp/sx/graph/ScholexplorerUtils.scala | 258 +++ .../graph/SparkCreateScholexplorerDump.scala | 141 ++ .../graph/scholix/ScholixGenerationTest.scala | 26 + .../dhp/oa/provision/PayloadConverterJob.java | 9 + .../dhp/oa/provision/model/JoinedEntity.java | 6 +- .../model/ProvisionModelSupport.java | 27 +- .../oa/provision/utils/XmlRecordFactory.java | 19 +- .../oa/provision/XmlRecordFactoryTest.java | 41 +- .../dnetlib/dhp/oa/provision/project_aka.json | 1 + dhp-workflows/dhp-stats-actionsets/pom.xml | 4 +- dhp-workflows/dhp-stats-hist-snaps/pom.xml | 4 +- dhp-workflows/dhp-stats-monitor-irish/pom.xml | 4 +- .../dhp-stats-monitor-update/pom.xml | 4 +- dhp-workflows/dhp-swh/pom.xml | 43 - .../dnetlib/dhp/swh/PrepareSWHActionsets.java | 3 +- .../dhp-usage-raw-data-update/pom.xml | 12 +- dhp-workflows/dhp-usage-stats-build/pom.xml | 18 +- pom.xml | 2048 +++++++++-------- 127 files changed, 5983 insertions(+), 1862 deletions(-) rename dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/{KeywordsClustering.java => LegalnameClustering.java} (54%) delete mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java create mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java delete mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java delete mode 100644 dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java create mode 100644 dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv create mode 100644 dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala create mode 100644 dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala create mode 100644 dhp-shade-package/pom.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/CoAuthorshipIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Coauthors.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Couples.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/WorkList.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/sdgnodoi/CreateActionSetSparkJob.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveRelationFromActionSet.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsCollectorPlugin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/job.properties create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/as_parameters.json create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/config-default.xml create mode 100644 dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/workflow.xml create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/person/CreatePersonAS.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveFromASTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIteratorTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java delete mode 100644 dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/WorkJson/part-00000 create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackListRemove/not_irish.json create mode 100644 dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/dblp.gz create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java create mode 100644 dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json create mode 100644 dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala create mode 100644 dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala create mode 100644 dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json diff --git a/.gitignore b/.gitignore index 14cd4d345..6fafc7055 100644 --- a/.gitignore +++ b/.gitignore @@ -27,3 +27,4 @@ spark-warehouse /**/.factorypath /**/.scalafmt.conf /.java-version +/dhp-shade-package/dependency-reduced-pom.xml diff --git a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java index 84b962b4b..eddcd8867 100644 --- a/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java +++ b/dhp-build/dhp-build-properties-maven-plugin/src/test/java/eu/dnetlib/maven/plugin/properties/WritePredefinedProjectPropertiesTest.java @@ -80,7 +80,15 @@ class WritePredefinedProjectPropertiesTest { mojo.outputFile = testFolder; // execute - Assertions.assertThrows(MojoExecutionException.class, () -> mojo.execute()); + try { + mojo.execute(); + Assertions.assertTrue(false); // not reached + } catch (Exception e) { + Assertions + .assertTrue( + MojoExecutionException.class.isAssignableFrom(e.getClass()) || + IllegalArgumentException.class.isAssignableFrom(e.getClass())); + } } @Test diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java index fac9a7565..fbf586f8c 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/PacePerson.java @@ -38,7 +38,7 @@ public class PacePerson { PacePerson.class .getResourceAsStream( "/eu/dnetlib/dhp/common/name_particles.txt"))); - } catch (IOException e) { + } catch (Exception e) { throw new RuntimeException(e); } } diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java index 76a7046fb..d2e53f11a 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/common/collection/HttpConnector2.java @@ -12,9 +12,7 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.math.NumberUtils; -import org.apache.commons.lang3.time.DateUtils; import org.apache.http.HttpHeaders; -import org.joda.time.Instant; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java index f8d878cf6..a660ab9b1 100644 --- a/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java +++ b/dhp-common/src/main/java/eu/dnetlib/dhp/schema/oaf/utils/MergeUtils.java @@ -468,6 +468,10 @@ public class MergeUtils { merge.setIsInDiamondJournal(booleanOR(merge.getIsInDiamondJournal(), enrich.getIsInDiamondJournal())); merge.setPubliclyFunded(booleanOR(merge.getPubliclyFunded(), enrich.getPubliclyFunded())); + if (StringUtils.isBlank(merge.getTransformativeAgreement())) { + merge.setTransformativeAgreement(enrich.getTransformativeAgreement()); + } + return merge; } diff --git a/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json b/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json index 98e8daa18..4f0cee53d 100644 --- a/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json +++ b/dhp-common/src/main/resources/eu/dnetlib/scholexplorer/relation/relations.json @@ -154,5 +154,13 @@ "unknown":{ "original":"Unknown", "inverse":"Unknown" + }, + "isamongtopnsimilardocuments": { + "original": "IsAmongTopNSimilarDocuments", + "inverse": "HasAmongTopNSimilarDocuments" + }, + "hasamongtopnsimilardocuments": { + "original": "HasAmongTopNSimilarDocuments", + "inverse": "IsAmongTopNSimilarDocuments" } } \ No newline at end of file diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala index a14c25837..526bbd295 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/application/SparkScalaApplication.scala @@ -65,12 +65,13 @@ abstract class AbstractScalaApplication( val conf: SparkConf = new SparkConf() val master = parser.get("master") log.info(s"Creating Spark session: Master: $master") - SparkSession + val b = SparkSession .builder() .config(conf) .appName(getClass.getSimpleName) - .master(master) - .getOrCreate() + if (master != null) + b.master(master) + b.getOrCreate() } def reportTotalSize(targetPath: String, outputBasePath: String): Unit = { diff --git a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala index a995016a8..72a17777e 100644 --- a/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala +++ b/dhp-common/src/main/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixUtils.scala @@ -65,7 +65,11 @@ object ScholixUtils extends Serializable { } def generateScholixResourceFromResult(r: Result): ScholixResource = { - generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) + val sum = ScholixUtils.resultToSummary(r) + if (sum != null) + generateScholixResourceFromSummary(ScholixUtils.resultToSummary(r)) + else + null } val statsAggregator: Aggregator[(String, String, Long), RelatedEntities, RelatedEntities] = @@ -153,6 +157,14 @@ object ScholixUtils extends Serializable { } + def invRel(rel: String): String = { + val semanticRelation = relations.getOrElse(rel.toLowerCase, null) + if (semanticRelation != null) + semanticRelation.inverse + else + null + } + def extractCollectedFrom(summary: ScholixResource): List[ScholixEntityId] = { if (summary.getCollectedFrom != null && !summary.getCollectedFrom.isEmpty) { val l: List[ScholixEntityId] = summary.getCollectedFrom.asScala.map { d => @@ -377,10 +389,7 @@ object ScholixUtils extends Serializable { if (persistentIdentifiers.isEmpty) return null s.setLocalIdentifier(persistentIdentifiers.asJava) - if (r.isInstanceOf[Publication]) - s.setTypology(Typology.publication) - else - s.setTypology(Typology.dataset) +// s.setTypology(r.getResulttype.getClassid) s.setSubType(r.getInstance().get(0).getInstancetype.getClassname) diff --git a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java index 9317c0ce4..ae7fdcc62 100644 --- a/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java +++ b/dhp-common/src/test/java/eu/dnetlib/dhp/schema/oaf/utils/OafMapperUtilsTest.java @@ -177,7 +177,7 @@ class OafMapperUtilsTest { assertTrue(cfId(d1.getCollectedfrom()).contains(ModelConstants.CROSSREF_ID)); assertEquals( - ModelConstants.DATASET_RESULTTYPE_CLASSID, + ModelConstants.PUBLICATION_RESULTTYPE_CLASSID, ((Result) MergeUtils .merge(p2, d1)) .getResulttype() diff --git a/dhp-pace-core/pom.xml b/dhp-pace-core/pom.xml index 6c706b692..52ec2a253 100644 --- a/dhp-pace-core/pom.xml +++ b/dhp-pace-core/pom.xml @@ -24,7 +24,7 @@ scala-compile-first - initialize + process-resources add-source compile @@ -59,14 +59,6 @@ edu.cmu secondstring - - com.google.guava - guava - - - com.google.code.gson - gson - org.apache.commons commons-lang3 @@ -91,10 +83,6 @@ com.fasterxml.jackson.core jackson-databind - - org.apache.commons - commons-math3 - com.jayway.jsonpath json-path @@ -113,4 +101,90 @@ + + + spark-24 + + true + + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-2 + + + + + + + + + + + spark-34 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-2 + + + + + + + + + + + spark-35 + + + + + org.codehaus.mojo + build-helper-maven-plugin + 3.4.0 + + + generate-sources + + add-source + + + + src/main/spark-35 + + + + + + + + + + diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LegalnameClustering.java similarity index 54% rename from dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java rename to dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LegalnameClustering.java index fdd8d1fb1..8a76a4bc3 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/KeywordsClustering.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/clustering/LegalnameClustering.java @@ -2,31 +2,41 @@ package eu.dnetlib.pace.clustering; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.commons.lang3.StringUtils; import eu.dnetlib.pace.config.Config; -@ClusteringClass("keywordsclustering") -public class KeywordsClustering extends AbstractClusteringFunction { +@ClusteringClass("legalnameclustering") +public class LegalnameClustering extends AbstractClusteringFunction { - public KeywordsClustering(Map params) { + private static final Pattern CITY_CODE_PATTERN = Pattern.compile("city::\\d+"); + private static final Pattern KEYWORD_CODE_PATTERN = Pattern.compile("key::\\d+"); + + public LegalnameClustering(Map params) { super(params); } + public Set getRegexList(String input, Pattern codeRegex) { + Matcher matcher = codeRegex.matcher(input); + Set cities = new HashSet<>(); + while (matcher.find()) { + cities.add(matcher.group()); + } + return cities; + } + @Override protected Collection doApply(final Config conf, String s) { - // takes city codes and keywords codes without duplicates - Set keywords = getKeywords(s, conf.translationMap(), paramOrDefault("windowSize", 4)); - Set cities = getCities(s, paramOrDefault("windowSize", 4)); - // list of combination to return as result final Collection combinations = new LinkedHashSet(); - for (String keyword : keywordsToCodes(keywords, conf.translationMap())) { - for (String city : citiesToCodes(cities)) { + for (String keyword : getRegexList(s, KEYWORD_CODE_PATTERN)) { + for (String city : getRegexList(s, CITY_CODE_PATTERN)) { combinations.add(keyword + "-" + city); if (combinations.size() >= paramOrDefault("max", 2)) { return combinations; @@ -42,9 +52,6 @@ public class KeywordsClustering extends AbstractClusteringFunction { return fields .stream() .filter(f -> !f.isEmpty()) - .map(KeywordsClustering::cleanup) - .map(KeywordsClustering::normalize) - .map(s -> filterAllStopWords(s)) .map(s -> doApply(conf, s)) .map(c -> filterBlacklisted(c, ngramBlacklist)) .flatMap(c -> c.stream()) diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java index b055077d8..6ef550c50 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java @@ -27,6 +27,14 @@ public class AbstractPaceFunctions extends PaceCommonUtils { private static Map cityMap = AbstractPaceFunctions .loadMapFromClasspath("/eu/dnetlib/pace/config/city_map.csv"); + // keywords map to be used when translating the keyword names into codes + private static Map keywordMap = AbstractPaceFunctions + .loadMapFromClasspath("/eu/dnetlib/pace/config/translation_map.csv"); + + // country map to be used when inferring the country from the city name + private static Map countryMap = AbstractPaceFunctions + .loadCountryMapFromClasspath("/eu/dnetlib/pace/config/country_map.csv"); + // list of stopwords in different languages protected static Set stopwords_gr = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_gr.txt"); protected static Set stopwords_en = loadFromClasspath("/eu/dnetlib/pace/config/stopwords_en.txt"); @@ -74,6 +82,64 @@ public class AbstractPaceFunctions extends PaceCommonUtils { return s12; } + public static String countryInference(final String original, String inferFrom) { + if (!original.equalsIgnoreCase("unknown")) + return original; + + inferFrom = cleanup(inferFrom); + inferFrom = normalize(inferFrom); + inferFrom = filterAllStopWords(inferFrom); + Set cities = getCities(inferFrom, 4); + return citiesToCountry(cities).stream().findFirst().orElse("UNKNOWN"); + } + + public static String cityInference(String original) { + original = cleanup(original); + original = normalize(original); + original = filterAllStopWords(original); + + Set cities = getCities(original, 4); + + for (String city : cities) { + original = original.replaceAll(city, cityMap.get(city)); + } + + return original; + } + + public static String keywordInference(String original) { + original = cleanup(original); + original = normalize(original); + original = filterAllStopWords(original); + + Set keywords = getKeywords(original, keywordMap, 4); + + for (String keyword : keywords) { + original = original.replaceAll(keyword, keywordMap.get(keyword)); + } + + return original; + } + + public static String cityKeywordInference(String original) { + original = cleanup(original); + original = normalize(original); + original = filterAllStopWords(original); + + Set keywords = getKeywords(original, keywordMap, 4); + Set cities = getCities(original, 4); + + for (String keyword : keywords) { + original = original.replaceAll(keyword, keywordMap.get(keyword)); + } + + for (String city : cities) { + original = original.replaceAll(city, cityMap.get(city)); + } + + return original; + } + protected static String fixXML(final String a) { return a @@ -208,6 +274,30 @@ public class AbstractPaceFunctions extends PaceCommonUtils { return m; } + public static Map loadCountryMapFromClasspath(final String classpath) { + + Transliterator transliterator = Transliterator.getInstance("Any-Eng"); + + final Map m = new HashMap<>(); + try { + for (final String s : IOUtils + .readLines(AbstractPaceFunctions.class.getResourceAsStream(classpath), StandardCharsets.UTF_8)) { + // string is like this: country_code;city1;city2;city3 + String[] line = s.split(";"); + String value = line[0]; + for (int i = 1; i < line.length; i++) { + String city = fixAliases(transliterator.transliterate(line[i].toLowerCase())); + String code = cityMap.get(city); + m.put(code, value); + } + } + } catch (final Throwable e) { + return new HashMap<>(); + } + return m; + + } + public static String removeKeywords(String s, Set keywords) { s = " " + s + " "; @@ -237,6 +327,10 @@ public class AbstractPaceFunctions extends PaceCommonUtils { return toCodes(keywords, cityMap); } + public static Set citiesToCountry(Set cities) { + return toCodes(toCodes(cities, cityMap), countryMap); + } + protected static String firstLC(final String s) { return StringUtils.substring(s, 0, 1).toLowerCase(); } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java index 7ad9b7445..b0dc11656 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/FieldDef.java @@ -47,9 +47,21 @@ public class FieldDef implements Serializable { private String clean; + private String infer; + + private String inferenceFrom; + public FieldDef() { } + public String getInferenceFrom() { + return inferenceFrom; + } + + public void setInferenceFrom(final String inferenceFrom) { + this.inferenceFrom = inferenceFrom; + } + public String getName() { return name; } @@ -126,6 +138,14 @@ public class FieldDef implements Serializable { this.clean = clean; } + public String getInfer() { + return infer; + } + + public void setInfer(String infer) { + this.infer = infer; + } + @Override public String toString() { try { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala index aa04188da..c6db62339 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/model/SparkModel.scala @@ -3,7 +3,7 @@ package eu.dnetlib.pace.model import com.jayway.jsonpath.{Configuration, JsonPath} import eu.dnetlib.pace.common.AbstractPaceFunctions import eu.dnetlib.pace.config.{DedupConfig, Type} -import eu.dnetlib.pace.util.MapDocumentUtil +import eu.dnetlib.pace.util.{MapDocumentUtil, SparkCompatUtils} import org.apache.commons.lang3.StringUtils import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema @@ -52,7 +52,7 @@ case class SparkModel(conf: DedupConfig) { val orderingFieldPosition: Int = schema.fieldIndex(orderingFieldName) val parseJsonDataset: (Dataset[String] => Dataset[Row]) = df => { - df.map(r => rowFromJson(r))(RowEncoder(schema)) + df.map(r => rowFromJson(r))(SparkCompatUtils.encoderFor(schema)) } def rowFromJson(json: String): Row = { @@ -123,9 +123,19 @@ case class SparkModel(conf: DedupConfig) { case _ => res(index) } } + + if (StringUtils.isNotBlank(fdef.getInfer)) { + val inferFrom : String = if (StringUtils.isNotBlank(fdef.getInferenceFrom)) fdef.getInferenceFrom else fdef.getPath + res(index) = res(index) match { + case x: Seq[String] => x.map(inference(_, MapDocumentUtil.getJPathString(inferFrom, documentContext), fdef.getInfer)) + case _ => inference(res(index).toString, MapDocumentUtil.getJPathString(inferFrom, documentContext), fdef.getInfer) + } + } + } res + } new GenericRowWithSchema(values, schema) @@ -146,5 +156,17 @@ case class SparkModel(conf: DedupConfig) { res } + def inference(value: String, inferfrom: String, infertype: String) : String = { + val res = infertype match { + case "country" => AbstractPaceFunctions.countryInference(value, inferfrom) + case "city" => AbstractPaceFunctions.cityInference(value) + case "keyword" => AbstractPaceFunctions.keywordInference(value) + case "city_keyword" => AbstractPaceFunctions.cityKeywordInference(value) + case _ => value + } + + res + } + } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java deleted file mode 100644 index 1d898ad83..000000000 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CityMatch.java +++ /dev/null @@ -1,48 +0,0 @@ - -package eu.dnetlib.pace.tree; - -import java.util.Map; -import java.util.Set; - -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.tree.support.AbstractStringComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; - -@ComparatorClass("cityMatch") -public class CityMatch extends AbstractStringComparator { - - private Map params; - - public CityMatch(Map params) { - super(params); - this.params = params; - } - - @Override - public double distance(final String a, final String b, final Config conf) { - - String ca = cleanup(a); - String cb = cleanup(b); - - ca = normalize(ca); - cb = normalize(cb); - - ca = filterAllStopWords(ca); - cb = filterAllStopWords(cb); - - Set cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4"))); - Set cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4"))); - - Set codes1 = citiesToCodes(cities1); - Set codes2 = citiesToCodes(cities2); - - // if no cities are detected, the comparator gives 1.0 - if (codes1.isEmpty() && codes2.isEmpty()) - return 1.0; - else { - if (codes1.isEmpty() ^ codes2.isEmpty()) - return -1; // undefined if one of the two has no cities - return commonElementsPercentage(codes1, codes2); - } - } -} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java new file mode 100644 index 000000000..25a12bcdf --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CodeMatch.java @@ -0,0 +1,51 @@ + +package eu.dnetlib.pace.tree; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + +@ComparatorClass("codeMatch") +public class CodeMatch extends AbstractStringComparator { + + private Map params; + + private Pattern CODE_REGEX; + + public CodeMatch(Map params) { + super(params); + this.params = params; + this.CODE_REGEX = Pattern.compile(params.getOrDefault("codeRegex", "[a-zA-Z]::\\d+")); + } + + public Set getRegexList(String input) { + Matcher matcher = this.CODE_REGEX.matcher(input); + Set cities = new HashSet<>(); + while (matcher.find()) { + cities.add(matcher.group()); + } + return cities; + } + + @Override + public double distance(final String a, final String b, final Config conf) { + + Set codes1 = getRegexList(a); + Set codes2 = getRegexList(b); + + // if no codes are detected, the comparator gives 1.0 + if (codes1.isEmpty() && codes2.isEmpty()) + return 1.0; + else { + if (codes1.isEmpty() ^ codes2.isEmpty()) + return -1; // undefined if one of the two has no codes + return commonElementsPercentage(codes1, codes2); + } + } +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java index c02381983..96a87c455 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/CountryMatch.java @@ -2,6 +2,7 @@ package eu.dnetlib.pace.tree; import java.util.Map; +import java.util.Set; import com.wcohen.ss.AbstractStringDistance; @@ -12,8 +13,11 @@ import eu.dnetlib.pace.tree.support.ComparatorClass; @ComparatorClass("countryMatch") public class CountryMatch extends AbstractStringComparator { + private Map params; + public CountryMatch(Map params) { super(params, new com.wcohen.ss.JaroWinkler()); + this.params = params; } public CountryMatch(final double weight) { @@ -26,6 +30,7 @@ public class CountryMatch extends AbstractStringComparator { @Override public double distance(final String a, final String b, final Config conf) { + if (a.isEmpty() || b.isEmpty()) { return -1.0; // return -1 if a field is missing } @@ -45,4 +50,5 @@ public class CountryMatch extends AbstractStringComparator { protected double normalize(final double d) { return d; } + } diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java new file mode 100644 index 000000000..e4a48c459 --- /dev/null +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerLegalname.java @@ -0,0 +1,59 @@ + +package eu.dnetlib.pace.tree; + +import java.util.Map; +import java.util.Set; + +import com.wcohen.ss.AbstractStringDistance; + +import eu.dnetlib.pace.config.Config; +import eu.dnetlib.pace.tree.support.AbstractStringComparator; +import eu.dnetlib.pace.tree.support.ComparatorClass; + +@ComparatorClass("jaroWinklerLegalname") +public class JaroWinklerLegalname extends AbstractStringComparator { + + private Map params; + + private final String CITY_CODE_REGEX = "city::\\d+"; + private final String KEYWORD_CODE_REGEX = "key::\\d+"; + + public JaroWinklerLegalname(Map params) { + super(params, new com.wcohen.ss.JaroWinkler()); + this.params = params; + } + + public JaroWinklerLegalname(double weight) { + super(weight, new com.wcohen.ss.JaroWinkler()); + } + + protected JaroWinklerLegalname(double weight, AbstractStringDistance ssalgo) { + super(weight, ssalgo); + } + + @Override + public double distance(String a, String b, final Config conf) { + + String ca = a.replaceAll(CITY_CODE_REGEX, "").replaceAll(KEYWORD_CODE_REGEX, " "); + String cb = b.replaceAll(CITY_CODE_REGEX, "").replaceAll(KEYWORD_CODE_REGEX, " "); + + ca = ca.replaceAll("[ ]{2,}", " "); + cb = cb.replaceAll("[ ]{2,}", " "); + + if (ca.isEmpty() && cb.isEmpty()) + return 1.0; + else + return normalize(ssalgo.score(ca, cb)); + } + + @Override + public double getWeight() { + return super.weight; + } + + @Override + protected double normalize(double d) { + return d; + } + +} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java deleted file mode 100644 index 576b9281d..000000000 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/JaroWinklerNormalizedName.java +++ /dev/null @@ -1,74 +0,0 @@ - -package eu.dnetlib.pace.tree; - -import java.util.Map; -import java.util.Set; - -import com.wcohen.ss.AbstractStringDistance; - -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.tree.support.AbstractStringComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; - -@ComparatorClass("jaroWinklerNormalizedName") -public class JaroWinklerNormalizedName extends AbstractStringComparator { - - private Map params; - - public JaroWinklerNormalizedName(Map params) { - super(params, new com.wcohen.ss.JaroWinkler()); - this.params = params; - } - - public JaroWinklerNormalizedName(double weight) { - super(weight, new com.wcohen.ss.JaroWinkler()); - } - - protected JaroWinklerNormalizedName(double weight, AbstractStringDistance ssalgo) { - super(weight, ssalgo); - } - - @Override - public double distance(String a, String b, final Config conf) { - String ca = cleanup(a); - String cb = cleanup(b); - - ca = normalize(ca); - cb = normalize(cb); - - ca = filterAllStopWords(ca); - cb = filterAllStopWords(cb); - - Set keywords1 = getKeywords( - ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); - Set keywords2 = getKeywords( - cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); - - Set cities1 = getCities(ca, Integer.parseInt(params.getOrDefault("windowSize", "4"))); - Set cities2 = getCities(cb, Integer.parseInt(params.getOrDefault("windowSize", "4"))); - - ca = removeKeywords(ca, keywords1); - ca = removeKeywords(ca, cities1); - cb = removeKeywords(cb, keywords2); - cb = removeKeywords(cb, cities2); - - ca = ca.replaceAll("[ ]{2,}", " "); - cb = cb.replaceAll("[ ]{2,}", " "); - - if (ca.isEmpty() && cb.isEmpty()) - return 1.0; - else - return normalize(ssalgo.score(ca, cb)); - } - - @Override - public double getWeight() { - return super.weight; - } - - @Override - protected double normalize(double d) { - return d; - } - -} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java deleted file mode 100644 index 53acb4dc8..000000000 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/KeywordMatch.java +++ /dev/null @@ -1,50 +0,0 @@ - -package eu.dnetlib.pace.tree; - -import java.util.Map; -import java.util.Set; - -import eu.dnetlib.pace.config.Config; -import eu.dnetlib.pace.tree.support.AbstractStringComparator; -import eu.dnetlib.pace.tree.support.ComparatorClass; - -@ComparatorClass("keywordMatch") -public class KeywordMatch extends AbstractStringComparator { - - Map params; - - public KeywordMatch(Map params) { - super(params); - this.params = params; - } - - @Override - public double distance(final String a, final String b, final Config conf) { - - String ca = cleanup(a); - String cb = cleanup(b); - - ca = normalize(ca); - cb = normalize(cb); - - ca = filterAllStopWords(ca); - cb = filterAllStopWords(cb); - - Set keywords1 = getKeywords( - ca, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); - Set keywords2 = getKeywords( - cb, conf.translationMap(), Integer.parseInt(params.getOrDefault("windowSize", "4"))); - - Set codes1 = toCodes(keywords1, conf.translationMap()); - Set codes2 = toCodes(keywords2, conf.translationMap()); - - // if no cities are detected, the comparator gives 1.0 - if (codes1.isEmpty() && codes2.isEmpty()) - return 1.0; - else { - if (codes1.isEmpty() ^ codes2.isEmpty()) - return -1.0; // undefined if one of the two has no keywords - return commonElementsPercentage(codes1, codes2); - } - } -} diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java index 0973fdf1e..0ff03f5e1 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeDef.java @@ -48,7 +48,7 @@ public class TreeNodeDef implements Serializable { // function for the evaluation of the node public TreeNodeStats evaluate(Row doc1, Row doc2, Config conf) { - TreeNodeStats stats = new TreeNodeStats(); + TreeNodeStats stats = new TreeNodeStats(ignoreUndefined); // for each field in the node, it computes the for (FieldConf fieldConf : fields) { diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java index 2b96048b4..f6b210a8c 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeNodeStats.java @@ -9,8 +9,11 @@ public class TreeNodeStats implements Serializable { private Map results; // this is an accumulator for the results of the node - public TreeNodeStats() { + private final boolean ignoreUndefined; + + public TreeNodeStats(boolean ignoreUndefined) { this.results = new HashMap<>(); + this.ignoreUndefined = ignoreUndefined; } public Map getResults() { @@ -22,7 +25,10 @@ public class TreeNodeStats implements Serializable { } public int fieldsCount() { - return this.results.size(); + if (ignoreUndefined) + return this.results.size(); + else + return this.results.size() - undefinedCount(); // do not count undefined } public int undefinedCount() { @@ -78,11 +84,22 @@ public class TreeNodeStats implements Serializable { double min = 100.0; // random high value for (FieldStats fs : this.results.values()) { if (fs.getResult() < min) { - if (fs.getResult() >= 0.0 || (fs.getResult() == -1 && fs.isCountIfUndefined())) + if (fs.getResult() == -1) { + if (fs.isCountIfUndefined()) { + min = 0.0; + } else { + min = -1; + } + } else { min = fs.getResult(); + } } } - return min; + if (ignoreUndefined) { + return min == -1.0 ? 0.0 : min; + } else { + return min; + } } // if at least one is true, return 1.0 @@ -91,7 +108,11 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() >= fieldStats.getThreshold()) return 1.0; } - return 0.0; + if (!ignoreUndefined && undefinedCount() > 0) { + return -1.0; + } else { + return 0.0; + } } // if at least one is false, return 0.0 @@ -100,7 +121,7 @@ public class TreeNodeStats implements Serializable { if (fieldStats.getResult() == -1) { if (fieldStats.isCountIfUndefined()) - return 0.0; + return ignoreUndefined ? 0.0 : -1.0; } else { if (fieldStats.getResult() < fieldStats.getThreshold()) return 0.0; diff --git a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java index 263504dbb..28b3a82af 100644 --- a/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java +++ b/dhp-pace-core/src/main/java/eu/dnetlib/pace/tree/support/TreeProcessor.java @@ -44,12 +44,10 @@ public class TreeProcessor { TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config); treeStats.addNodeStats(nextNodeName, stats); - // if ignoreUndefined=false the miss is considered as undefined - if (!currentNode.isIgnoreUndefined() && stats.undefinedCount() > 0) { + double finalScore = stats.getFinalScore(currentNode.getAggregation()); + if (finalScore == -1.0) nextNodeName = currentNode.getUndefined(); - } - // if ignoreUndefined=true the miss is ignored and the score computed anyway - else if (stats.getFinalScore(currentNode.getAggregation()) >= currentNode.getThreshold()) { + else if (finalScore >= currentNode.getThreshold()) { nextNodeName = currentNode.getPositive(); } else { nextNodeName = currentNode.getNegative(); diff --git a/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv b/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv new file mode 100644 index 000000000..01e49979c --- /dev/null +++ b/dhp-pace-core/src/main/resources/eu/dnetlib/pace/config/country_map.csv @@ -0,0 +1,239 @@ +JP;Sapporo;Kobe;Okayama;Maebashi;Nagoya;Yamaguchi;Sendai;Nagano;Saga;Otsu;Niigata;Akita;Fukui;Matsue;Yamagata;Wakayama;Nagasaki;Kumamoto;Shizuoka;Osaka;Chiba;Hiroshima;Yawatahama-shi;Gifu;Takamatsu;Naha;Fukushima;Yokohama;Kagoshima;Kyoto;Aomori;Kanazawa;Miyazaki;Tsu;Tokyo;Toyama;Kochi;Tokushima;Hirado;Fukuoka;Mito;Japan;Utsunomiya;Nara;Kofu;Kozakai-cho;Saitama;Oita;Matsuyama;Morioka +ID;Tahuna;Amuntai;Karanganyar;Pangkalpinang;Makassar;Ranai;Sampit;Melonguane;Kotabumi;Jember;Banjar;Tanjungpandan;Tembagapura;Kisaran;Pangururan;Rangkasbitung;Bandar Lampung;Buntok;Tarutung;Makale;Saumlaki;Larantuka;Raba;Palangkaraya;Airmadidi;Ruteng;Indonesia;Padang;Sanana;Jambi;Bagan Si Api-api;Rantau Prapat;Watampone;Tambolaka;Bandung;Meulaboh;Cilacap;Serui;Denpasar;Manggar;Bengkalis;Piru;Parigi;Pekanbaru;Bengkulu;Tenggarong;Maumere;Majalengka;Soreang;Menggala;Palu;Sumenep;Sumber;Surabaya;Kandangan;Kupang;Betun;Kendari;Tanjungpinang;Mataram;Waingapu;Maba;Ambon;Ciamis;Rantepao;Atambua;Manado;Pelabuhanratu;Nabire;Banyuwangi;Ransiki;Bintuni;Lubuk Sikaping;Ende;Tanjung Selor;Cibinong;Kefamenanu;Wamena;Fakfak;Praya;Dompu;Tobelo;Sumbawa Besar;Raha;Biak;Gunungsitoli;Kuala Kapuas;Tuban;Bangkinang;Yogyakarta;Amahai;Pariaman;Pontianak;Merauke;Putussibau;Leramatang;Sofifi;Kolonodale;Singaraja;Cianjur;Jakarta;Jepara;Tomohon;Medan;Poso;Namlea;Kuningan;Taliwang;Pamekasan;Kuala Pembuang;Purwokerto;Bangkalan;Kaimana;Batang;Muara Teweh;Sorong;Ungaran;Barabai;Waikabubak;Prabumulih;Masamba;Samarinda;Semarang;Puruk Cahu;Pageralam;Barru;Rappang;Martapura;Sentani;Painan;Indramayu;Banjarmasin;Cimerak;Karawang;Tamiang Layang;Bungku;Pinrang;Jayapura;Bebandem;Manokwari;Majene;Ampana;Purwakarta;Palembang;Slateng Dua;Garut;Subang;Sungailiat;Lahat;Mamuju;Soe;Pati;Tentena;Singaparna;Kalabahi;Luwuk;Tarogong;Banda Aceh;Saparua;Teminabuan;Gorontalo;Serang +IN;Ahmedabad;Shimla;Calicut;Jammu;Raipur;Chennai;New Delhi;Itanagar;Dehra Dun;Kolkata;Chandigarh;Jaipur;Thiruvananthapuram;Panchkula;India;Hyderabad;Bhopal;Bhubaneshwar;Kohima;Bangalore;Amaravati;Port Blair;Lucknow;Shillong;Aizawl;Dispur;Kavaratti;Mumbai;Daman;Gangtok;Imphal;Delhi;Srinagar;Agartala;Patna;Panaji;Ghandinagar;Ranchi;Puducherry;Madurai +CN;Changsha;Guangzhou;Fuqing;Dongyang;Yushan;Qingping;Leiyang;Hongshui;Shanhu;Meishan;Suileng;Gyegu;Quyang;Encheng;Weinan;Xushan;Tongliao;Qinzhou;Tengyue;Taiyuan;Tacheng;Tangdong;Puxi;Shuangcheng;Wencheng;Dexing;Beijing;Taohuajiang;Dongping;Jinhua;Wuzhong;Golmud;Boli;Yi Xian;Medog;Yatou;Chaozhou;Zalantun;Xigaze;Anqing;Wangqing;Zhijiang;Linkou;Zhamog;Zhugang;Fuding;Mishan;Jinbi;Longjiang;Neijiang;Yiwu;Fangchenggang;Bayan Hot;Beian;Yuquan;Urumqi;Mianyang;Luocheng;Jinshi;Xinpu;Huishi;Yilong;Hengzhou;Huanggang;Yichun;Tuodian;Zuitou;Xining;Jinan;Jinzhong;Zhongcheng;Lu'an;Dongxing;Yinggen;Fuyuan;Dadukou;Anlu;Weichanglu;Qingquan;Tangdukou;Dongta;Qingdao;Gaoyou;Dalain Hob;Yunzhong;Xiedian;Wuchang;Changde;Minzhu;Yicheng;Jiamusi;Qamdo;Loufan;Zhujiacun;Gutao;Shangzhi;Dianbu;Shacheng;Zhuji;Guigang;Qingan;Ji'an;Huazangsi;Leping;Changling;Dashiqiao;Mazhang;Tailai;Baishan;Jiangmen;Altay;Kaiyuan;Chang'an;Beihai;Suohe;Shenyang;Zhangye;Sanming;Guankou;Ma'erkang;Nanchang;Karakax;Rongcheng;Ningbo;Kaiyun;Linxi;Heshan;Xinyu;Kaifeng Chengguanzhen;Liujiaxia;Guang'an;Mengmeng;Tieli;Chengjiao;Lianzhou;Jian'ou;Yangjiang;Leizhou;Xianyang;Yian;Jianshe;Kunming;Deyang;Liuhe;Yangshe;Xiaping;Dongtai;Huquan;Fujin;Siyang;Yisuhe;Yanqi;Kaiwen;Lingshou;Hechi;Taixing;Jiancheng;Qinggang;Xireg;Yutan;Xinzhou;Laojiezi;Xiayang;Yancheng;Liaocheng;Ningde;Heihe;Gaizhou;Xibeijie;Helixi;Taicheng;Uchturpan;Jiaoxiyakou;Chongqing;Jiayuguan;Yanjiang;Nenjiang;Suonan;Xianshuigu;Yakou;Gexianzhuang;Qiqihar;Lanyi;Shangtianba;Laiyuan;Tianchang;Meizhou;Huichang;Haikou;Zequ;Shuanghe;Shijiazhuang;Mizhou;Luanzhou;Yunfu;Mali;Zanhuang;Hoh Ereg;Xishan;Fucheng;Huaiyang;Qionghu;Wuxi;Anguo;Xinhualu;Nehe;Xinxing;Weifen;Tekes;Longquan;Kuaidamao;Rizhao;Zhangzhou;Zhaozhou;Tongshan;Yongbei;Lecheng;Jinshan;Zhenzhou;Huili Chengguanzhen;Xiangyang;Muping;Baoding;Changzhou;Qianzhou;Huilong;Kaji;Dingzhou;Langzhong;Ailan Mubage;Menglie;Chizhou;Aral;Dianga;Aketao;Suzhou;Linjiang;Longxing;Jincheng;Wuyishan;Xiwanzi;Mangqu;Fenglu;Shishi;Zhenjiang;Nagqu;Gulou;Dongcun;Wanyuan;Zhangjiajie;Hailun;Zhaoqing;Bazhou;Zhuzhou;Zhuhai;Zhoushan;Lhasa;Qarqan;Cangzhou;Dengtalu;Pagqen;Chifeng;Yingshouyingzi;Hong'an;Huludao;Wuwei;Tafeng;Xiaqiaotou;Shaoxing;Chinggil;Chengde;Haomen;Sanhe;Gabasumdo;Changzhi;Keshan;Xinglong;Nanjian;Labuleng;Shanhe;Langfang;Jingdezhen;Shaoyang;Zhongwei;Yucheng;Hangzhou;Guixi;Tongjiang;Fengcheng;Lucheng;Licheng;Taozhou;Nansan;Jiangguanchi;Handan;Dongchuan;Tongyangdao;Xinshi;Mengdong;Youganning;Kaihua;Zhedao;Suining;Meicheng;Chengdu;Genhe;Ghulja;Wenlan;Jinghai;Jingzhou;Jiannan;Shiyan;Taihecun;Luzhou;Gongzhuling;Fengning;Hoxut;Xiongzhou;Hunchun;Fukang;Yinying;Yanji;Linquan;Huinan;Jingcheng;Wusong;Gyangze;Qingnian;Toqsu;Beidao;Qitai;Menglang;Huolu;Weihai;Zijinglu;Xingsha;Hegang;Tangxing;Lishui;Tianjin;Dongning;Dingcheng;Beichengqu;Wenxian Chengguanzhen;Magitang;Aba;Putian;Xicheng;Siping;Alashankou;Huazhou;Tongchuan;Qinhe;Xinyuan;Tanbei;Yuli;Suifenhe;Hejian;Dazhou;Lindong;Zhangjiakou;Chengguan;Chongshan;Sanjiang;Chengxiang;Wudalianchi;Hulin;Mabai;Nantong;Nangandao;Luofeng;Longtoushan Jiezi;Hai'an;Kargilik;Xincheng;Ulan Hua;Renqiu;Sanjiaocheng;Jin'e;Yakeshi;Anda;Yong'an;Daqing;Lianyuan;Qabqa;Xiangcheng;Linxia Chengguanzhen;Wuling;Baiyashi;Yishi;Peyziwat;Xisa;Taoyang;Fuzhou;Wutong;Bama;Yingcheng;Ning'an;Shengping;Shuangluan;Hezuo;Longchuan;Xiangjiaba;Cencheng;Huangshan;Shengli;Heze;Shaoguan;Zhongshan;Xincun;Donghua;Ducheng;Kuiju;Yuxi;Gannan;Dunhua;Pingliang;Yan'an Beilu;Baicheng;Shangrao;Shuozhou;Guozhen;Rongjiawan;Liulin;Lingbao Chengguanzhen;Leling;Zhangjiakou Shi Xuanhua Qu;Khutubi;Nanjing;Nada;Huangshi;Yulin;Oroqen Zizhiqi;Wulan;Ziketan;Lingquan;Hohhot;Dezhou;Guiping;Basuo;Jixi;Fu'an;Wuhai;Jiaji;Jiujiang;Cuihua;Yitiaoshan;Shenzhen;Dahuaishu;Koktokay;Yuanquan;Kumul;Zhuozhou;Jiangna;Xiashi;E'erguna;Xangda;Nong'an;Hotan;Xingcheng;Da'an;Zhuolu;Dingxi;Taihe;Meihekou;Jiaxing;Lanxi;Mudanjiang;Zhengding;Jinjiang;Yongqing;Jiantang;Rulin;Weiyuan;Wuyi;Zhaxi;Xinji;Wuhan;Pingxiang;Shanghai;Longjing;Zhoukou;Bamiantong;Laibin;Pudong;Wenping;Fancheng;Mositai;Yangquan;Puyang Chengguanzhen;Yingchuan;Luzhang;Aihua;Xiaoxita;Zhangping;Fuyang;Nanchong;Qingyang;Zhuangyuan;Jieyang;Miluo Chengguanzhen;Zhaoyu;Shuangqiao;Pingquan;Zhengzhou;Toksun;Tumen;Daiyue;Fangting;Botou;Wuzhou;Mengla;Chaigoubu;Tuncheng;Atushi;Kuqa;Heyin;China;Chenzhou;Sanya;Guiyang;Changting;Rongwo;Ruoqiang;Liaoyuan;Changchun;Baiyin;Ezhou;Huanghua;Karamay;Ji'an Shi;Yushu;Wuhu;Zhongba;Taizhou;Shulan;Longyan;Cili;Lushar;Gaozhou;Hengshui;Songyang;Jinchang;Helong;Shenzhou;Kuytun;Ganzhou;Luohe;Nilka;Puyang;Dali;Guilin;Qapqal;Nanping;Luancheng;Korla;Yingkou;Chengjiao Chengguanzhen;Dongguan;Zhengjiatun;Xihuachi;Xi'an;Harbin;Gar;Leshou;Baiquan;Yaofeng;Quanzhou;Lianran;Guma;Aksu;Xinhua;Gaobeidian;Shangpa;Shihezi;Lianshan;Aheqi;Yiyang;Wuxue;Hezhou;Mingxing;Jizhou;Wenchang;Baocheng;Chuimatan;Haicheng;Liancheng;Jinghong;Shaowu;Heyuan;Ma'anshan;Foshan;Qingyuan;Huzhou;Toli;Zunhua;Xiping;Turpan;Maoming;Huizhou;Hekou;Hefei;Chongzuo;Yanghe;Lechang;Guangming;Yibin;Wancheng;Hongliuwan;Yinchuan;Zhaodong;Lianhe;Kangbao;Qagan Us;Gaoping;Simao;Xichang;Lin'an;Jishi;Huangzhai;Suihua;Xiluodu;Hailin;Zhanjiang;Qincheng;Xiamen;Kashgar;Qiaotou;Nanning;Shantou;Fusui;Gaocheng;Zhanggu;Chuqung;Liuzhou;Shuangyashan;Jingping;Longhua;Shanwei;Shazhou;Lanzhou;Rucheng;Jinhe;Yunxian Chengguanzhen +PH;Ilagan;Poblacion;Baguio City;Kidapawan;Kalibo;Marikina City;Cebu City;Jordan;Perez;Prosperidad;Agdangan;Hinatuan;Mambajao;Iligan;Calapan;Puerto Princesa;Candelaria;Anda;Taguig City;Navotas;Roseller Lim;Pasay City;Tabuk;Jolo;Caloocan City;Cotabato;Tagbilaran City;Davao;Ormoc;Isulan;Tandag;Tuguegarao;Kabugao;Lucena;Mandaluyong City;Tingloy;Laoag;San Antonio;Malaybalay;Monkayo;Maasin;San Juan;Makati City;Butuan;La Trinidad;Iloilo;Datu Odin Sinsuat;Manila;Quezon City;Santiago;Tiaong;Pili;San Jose;Trece Martires City;Legazpi City;Malita;Mamburao;Argao;Valenzuela;Maribojoc;Angeles City;Magalang;Mati;Pagadian;Banaybanay;Muntinlupa City;City of Isabela;Batangas;Dumaguete City;Badian;Baclayon;Talipao;President Quirino;Balanga;Buluan;Vigan;Santa Cruz;Pasig City;Digos;Barobo;Surigao;Oroquieta;Naga City;Kabasalan;Buguey;Boac;Padre Burgos;Tarlac City;Daet;Iba;Malolos;Lapu-Lapu City;Catbalogan;Cabarroguis;Mandaue City;Dauis;Olongapo;Alabel;Marawi City;Bangued;Lagawe;Claveria;Cabadbaran;Naval;Baler;Malabon;Roxas City;Sorsogon;Tagum;San Fernando;Catarman;Dipolog;Koronadal;Bontoc;Siquijor;Philippines;Tacloban;Corella;Palayan City;Basco;Lianga;Tubod;Virac;Bayombong;Panay;Borongan;Zamboanga City;Cagayan de Oro;Romblon;Bacolod;City of Paranaque;Masbate;Antipolo;Nabunturan;Ipil;Dalaguete;Bongao;Lupon;Dagupan City;Alcantara;General Santos;Lingayen +BR;Lagoa do Mato;Sousa;Neopolis;Pirai do Norte;Limoeiro do Norte;Anastacio;Frecheirinha;Varjota;Rio Tinto;Palmeira dos Indios;Cha Grande;Balsas;Araioses;Aracati;Remigio;Floriano;Ico;Guamare;Itiquira;Pedras de Fogo;Ipiau;Conceicao de Jacuipe;Jucas;Cha da Alegria;Itapecuru Mirim;Acopiara;Goianinha;Pao de Acucar;Manaquiri;Paramoti;Colina;Jucurutu;Rio Formoso;Coari;Rio Real;Sao Felix;Aragoiania;Ararenda;Itapiranga;Baiao;Patu;Amapa;Itubera;Atalaia;Passagem Franca;Uropa;Caem;Carnaubal;Gurupi;Alta Floresta;Sao Tome;Jaru;Guajara-Mirim;Primavera;Macajuba;Dianopolis;Queimadas;Madalena;Fonte Boa;Morro Agudo;Morada Nova;Pastos Bons;Goias;Tapiramuta;Acarau;Atalaia do Norte;Cristinapolis;Aratuba;Sao Francisco;Mazagao;Viseu;Aracaju;Feira de Santana;Penedo;Cuite;Parnarama;Boqueirao;Barreira;Conceicao das Alagoas;Juazeirinho;Colorado do Oeste;Brazil;Canguaretama;Lucena;Sapeacu;Feijo;Sao Francisco do Maranhao;Ielmo Marinho;Barra;Santa Quiteria do Maranhao;Guaraci;Castanhal;Grajau;Edeia;Passira;Pimenta Bueno;Nazare da Mata;Acari;Escada;Codajas;Itagi;Beneditinos;Palmacia;Japura;Tracuateua;Ji-Parana;Jaragua;Formosa do Rio Preto;Sao Joao Batista;Santa Luzia do Itanhy;Soledade;Florania;Tuntum;Ibiraci;Altos;Cupira;Lagoa dos Gatos;Luis Correia;Uarini;Lagoa da Confusao;Florianopolis;Alhandra;Baixa Grande;Anguera;Araxa;Mineiros;Pocao de Pedras;Penalva;Joaquim Gomes;Nova Olinda;Mansidao;Governador Dix-Sept Rosado;Simoes Filho;Tucuma;Arari;Varzedo;Sao Pedro do Piaui;Joao Camara;Natividade;Lavras da Mangabeira;Alcantaras;Santana do Cariri;Jaguaretama;Brasnorte;Barao do Grajau;Aragarcas;Itainopolis;Santa Isabel do Para;Carauari;Nina Rodrigues;Nossa Senhora dos Milagres;Sao Miguel das Matas;Aratuipe;Baturite;Colares;Pianco;Messias;Pires Ferreira;Moreno;Campo Maior;Ibateguara;Tapaua;Capao da Canoa;Augustinopolis;Manoel Vitorino;Palhano;Cacu;Torres;Aguas Belas;Ibipetuba;Paulino Neves;Ubata;Tibau do Sul;Capanema;Itumbiara;Palmares;Olimpia;Quixada;Frutal;Tome-Acu;Cururupu;Vitoria;Xapuri;Paripiranga;Paragominas;Juripiranga;Wagner;Marapanim;Rio de Janeiro;Meruoca;Sao Francisco do Conde;Itapipoca;Capim;Sao Jose da Coroa Grande;Canutama;Serra Branca;Colonia Leopoldina;Riachao das Neves;Calcoene;Matoes;Madre de Deus;Santa Ines;Candido Mendes;Valente;Umbauba;Joao Pessoa;Craibas;Urucurituba;Buriti Alegre;Orobo;Goiania;Jaboatao dos Guararapes;Vargem Grande;Sao Bento;Autazes;Coelho Neto;Mauriti;Parnaiba;Crateus;Mata de Sao Joao;Guara;Alto Santo;Mocajuba;Quipapa;Campo Novo do Parecis;Maragogipe;Iraucuba;Indiaroba;Sacramento;Coracao de Maria;Nova Crixas;Arara;Barauna;Formoso do Araguaia;Puxinana;Saubara;Serra Preta;Pedro Velho;Morro do Chapeu;Lagoa do Ouro;Barra de Santa Rosa;Manacapuru;Tiangua;Anori;Sao Miguel do Guapore;Ipojuca;Caninde;Uniao dos Palmares;Jandaira;Sao Vicente Ferrer;Barreirinha;Capitao Poco;Pauini;Alianca;Russas;Maranguape;Baia da Traicao;Aracoiaba;Itamaraca;Gudofredo Viana;Campos Lindos;Araguaina;Santa Rita;Pindare-Mirim;Piracanjuba;Presidente Dutra;Bequimao;Piracuruca;Mancio Lima;Iati;Pacatuba;Paranatama;Buriti do Tocantins;Santo Estevao;Itaberai;Boca da Mata;Tururu;Irara;Pio IX;Amargosa;Altamira;Aracagi;Maraial;Pitimbu;Ceres;Alto Araguaia;Recife;Vicentinopolis;Bela Cruz;Careiro;Alcantara;Mucambo;Angicos;Cruz das Almas;Irituia;Flexeiras;Sao Jose de Mipibu;Curitiba;Araguatins;Olho d'Agua das Cunhas;Moita Bonita;Pintadas;Alto Longa;Campinorte;Varzea Grande;Caxias;Xinguara;Limoeiro de Anadia;Estreito;Igaci;Senador Guiomard;Carire;Reriutaba;Sao Bernardo;Antonio Cardoso;Urucara;Dois Riachos;Areia Branca;Uruara;Belem de Maria;Jatai;Vitoria do Mearim;Novo Aripuana;Mutuipe;Salitre;Gravata;Cajazeiras;Muritiba;Barrocas;Cerejeiras;Bujaru;Joao Alfredo;Maravilha;Portel;Timbauba;Cassia;Conde;Sao Jose do Egito;Boa Nova;Pedra Branca;Vicencia;Orlandia;Mairi;Ararangua;Rio Maria;Borba;Turiacu;Bacabal;Angelim;Santa Juliana;Jacobina;Bacuri;Alto Alegre dos Parecis;Umarizal;Maraba;Coroata;Rio Preto da Eva;Passa e Fica;Conceicao do Almeida;Ibirataia;Sao Luis do Quitunde;Tocantinopolis;Tonantins;Boa Vista;Novo Airao;Itapissuma;Cumaru;Alagoa Grande;Miranorte;Guaira;Itapororoca;Tupanatinga;Monte Alegre;Barretos;Mundo Novo;Tejucuoca;Patrocinio Paulista;Macapa;Presidente Vargas;Currais Novos;Croata;Anapurus;Nilo Pecanha;Senador Jose Porfirio;Ipiranga do Piaui;Porto de Pedras;Paco do Lumiar;Chapada dos Guimaraes;Itamarati;Santo Antonio;Gloria do Goita;Santo Antonio do Ica;Vila do Conde;Jericoacoara;Serrolandia;Sao Joaquim da Barra;Poranga;Centralina;Euclides da Cunha;Braganca;Sao Caetano de Odivelas;Ibicuitinga;Pocone;Bananeiras;Axixa do Tocantins;Cruzeiro do Sul;Sao Sebastiao do Uatuma;Amarante;Caldeirao Grande;Barreirinhas;Costa Marques;Santa Teresinha (2);Brejo do Cruz;Catende;Diamantino;Codo;Esperanca;Maxaranguape;Guiratinga;Pontalina;Nova Brasilandia d'Oeste;Aquidaba;Macaiba;Jaguaribara;Ribeiropolis;Belo Horizonte;Buriti;Ananas;Camocim;Camocim de Sao Felix;Rosario Oeste;Porto Calvo;Nova Olinda do Norte;Nova Cruz;Satuba;Luzilandia;Igarape Grande;Santaluz;Franca;Barro;Goiana;Bom Lugar;Obidos;Aurelino Leal;Alvorada;Vargem da Roca;Bayeux;Areia;Santa Cruz;Ubaira;Primeira Cruz;Anama;Garanhuns;Januario Cicco;Itatira;Chaval;Apodi;Cruz;Ubaitaba;Sao Francisco do Guapore;Coxim;Nazare;Corumba;Gameleira;Mogeiro;Marcacao;Campina Grande;Imperatriz;Itaberaba;Ipora;Acara;Ibirapitanga;Alagoinha;Amaraji;Santana do Matos;Elesbao Veloso;Santanopolis;Santa Helena;Bom Jardim;Itapuranga;Aveiro;Miracema do Tocantins;Itapiuna;Umirim;Araruna;Cuiaba;Buritirama;Olivenca;Morrinhos;Pendencias;Cedro;Teixeira;Ouro Branco;Bom Conselho;Nova Xavantina;Sena Madureira;Bonito de Santa Fe;Santa Quiteria;Guarai;Paripueira;Urucui;Cujubim;Humberto de Campos;Ladario;Quijingue;Agua Branca;Cacoal;Presidente Medici;Cruz do Espirito Santo;Mara Rosa;Agua Azul do Norte;Ipixuna;Presidente Figueiredo;Prata;Tomar do Geru;Cerro Cora;Parelhas;Alta Floresta D'Oeste;Nossa Senhora das Dores;Jussara;Hidrolandia;Rondonopolis;Pindoretama;Jiquirica;Itaparica;Itupiranga;Piripiri;Joao Lisboa;Saude;Dona Ines;Sao Gabriel da Cachoeira;Caracarai;Lajes;Inga;Mozarlandia;Mulungu;Neropolis;Campos Sales;Pacajus;Catole do Rocha;Juara;Caruaru;Santo Antonio do Taua;Oiapoque;Gurupa;Agua Preta;Nova Floresta;Itabaiana;Iranduba;Sao Luis do Curu;Maurilandia;Pentecoste;Cabaceiras do Paraguacu;Caiaponia;Oeiras;Bujari;Vicosa do Ceara;Itaituba;Mombaca;Ipanguacu;Cansancao;Aiuaba;Goiatins;Tenente Ananias Gomes;Sinop;Saboeiro;Itajiba;Granja;Uruacu;Rio Verde;Pe de Serra;Sao Mateus do Maranhao;Iguatu;Sao Lourenco da Mata;Cachoeira;Mujui dos Campos;Varzea Nova;Salgado de Sao Felix;Alenquer;Crato;Normandia;Quatipuru;Varzea Alegre;Cachoeira dos Indios;Governador Archer;Aldeias Altas;Tramandai;Jurua;Ponto Novo;Nova Timboteua;Carnaubais;Sao Jose de Piranhas;Santo Antonio do Leverger;Sonora;Alvorada D'Oeste;Salinopolis;Inhangapi;Jacarau;Milha;Barras;Barra de Santo Antonio;Ribeirao;Estancia;Uirauna;Ipueiras;Iacu;Taquarana;Cairu;Pilar;Senador Canedo;Camacari;Senador Pompeu;Ibia;Monte Alegre de Sergipe;Silves;Aroeiras;Jaciara;Gurinhem;Serra Caiada;Terra de Areia;Mossoro;Gandu;Batalha;Extremoz;Pedro II;Alvaraes;Capistrano;Firminopolis;Esplanada;Santa Isabel do Rio Negro;Coreau;Peritoro;Prainha;Igrapiuna;Timbiras;Joaquim Pires;Barra do Corda;Ferreiros;Sape;Boa Vista do Tupim;Itapage;Afua;Natuba;Acu;Maracanau;Ipua;Tambe;Pombos;Marechal Taumaturgo;Ipecaeta;Colmeia;Jequie;Solanea;Marco;Tamboril;Lagoa de Itaenga;Ibaretama;Carira;Manaus;Patos;Vigia;Miraima;Nhamunda;Pedro Afonso;Esperantina;Porto de Moz;Sao Luis Gonzaga do Maranhao;Branquinha;Olinda;Quebrangulo;Lagoa Seca;Quixere;Beberibe;Filadelfia;Sao Paulo;Oriximina;Matias Olimpio;Ouro Preto d'Oeste;Frei Paulo;Castro Alves;Faro;Porto Alegre;Malhador;Sucupira do Norte;Forquilha;Nossa Senhora da Gloria;Uruana;Rosario;Nossa Senhora dos Remedios;Picui;Cajueiro;Capela do Alto Alegre;Tracunhaem;Sao Miguel de Touros;Duque Bacelar;Assare;Axixa;Miguel Calmon;Rialma;Maragogi;Rondon do Para;Presidente Kennedy;Arapiraca;Paulista;Benevides;Uruoca;Pau dos Ferros;Mirangaba;Uberaba;Chorozinho;Rio Branco;Guadalupe;Pombal;Piacabucu;Brejinho;Piritiba;Santo Antonio de Jesus;Maues;Itiuba;Ipira;Capela;Santana;Boca do Acre;Riachao do Jacuipe;Choro;Trindade;Natal;Nova Russas;Ipu;Machados;Quixelo;Campo Grande;Trairi;Porto Nacional;Comodoro;Araripe;Abreu e Lima;Sao Joao dos Patos;Caapora;Miranda;Lauro de Freitas;Coite do Noia;Barra do Garcas;Espigao D'Oeste;Pirapemas;Pedregulho;Porto;Placido de Castro;Jaguaribe;Bom Jesus;Fortaleza;Lagoa do Carro;Itirucu;Sao Raimundo das Mangabeiras;Inhumas;Itatim;Tucurui;Estrela de Alagoas;Catarina;Barcelos;Brejoes;Sao Goncalo dos Campos;Anadia;Imaculada;Igarassu;Ariquemes;Amontada;Sao Felipe;Canhotinho;Catunda;Goianira;Picos;Sao Felix do Xingu;Itacare;Itapetim;Palmeiras de Goias;Catu;Anajas;Camamu;Limoeiro;Sao Miguel do Araguaia;Carmo do Rio Verde;Caucaia;Sao Joao;Terra Santa;Pio XII;Passos;Campo Formoso;Nossa Senhora do Livramento;Valenca;Careiro da Varzea;Goianapolis;Colinas do Tocantins;Acarape;Barroquinha;Jardim do Serido;Augusto Correa;Guapo;Porto Acre;Tres Cachoeiras;Caridade;Brejo Santo;Alagoa Nova;Sorriso;Matriz de Camarajibe;Canapolis;Maceio;Camarajibe;Fortim;Aracas;Maruim;Delta;Soure;Terra Alta;Colider;Demerval Lobao;Porangatu;Itau de Minas;Belterra;Ipaporanga;Palmeirais;Japaratinga;Sao Sebastiao do Passe;Itaporanga;Porto Franco;Manicore;Salvaterra;Pindobacu;Coremas;Guimaraes;Abaiara;Ceara-Mirim;Rio do Fogo;Seringueiras;Tamandare;Antenor Navarro;Loreto;Campo do Brito;Marau;Jitauna;Montes Altos;Sirinhaem;Missao Velha;Pereiro;Ruy Barbosa;Paraipaba;Porto Alegre do Norte;Caraubas;Touros;Sao Paulo de Olivenca;Wanderlandia;Nazaria;Palmas;Pinheiro;Massaranduba;Ipaumirim;Porto Valter;Sao Domingos do Maranhao;Jutai;Anajatuba;Labrea;Aramari;Carneiros;Confresa;Utinga;Humaita;Cascavel;Buriti dos Lopes;Cocal;Planura;Carutapera;Porto Grande;Aurora;Santo Amaro;Cajari;Chapadinha;Melgaco;Curuca;Dario Meira;Matinha;Amatura;Luis Gomes;Itarema;Anicuns;Eusebio;Massape;Vila Rica;Afonso Bezerra;Monsenhor Gil;Rolim de Moura;Santa Maria do Para;Fronteiras;Itabaianinha;Macaparana;Barreiros;Paraibano;Vitoria de Santo Antao;Timon;Apuiares;Serrinha;Magalhaes de Almeida;Jacareacanga;Acailandia;Viana;Moncao;Peri-Mirim;Curua;Mata Roma;Sao Joaquim do Monte;Mirador;Sao Jose da Tapera;Xexeu;Tartarugalzinho;Dias d'Avila;Capitao de Campos;Guarabira;Banabuiu;Belem;Nossa Senhora Aparecida;Uruburetama;Jacunda;Colinas;Maracacume;Agrestina;Icatu;Pacoti;Sao Benedito;Maracas;Buenos Aires;Sao Jose de Ribamar;Jaicos;Salinas da Margarida;Conceicao do Coite;Valinhos;Lucas do Rio Verde;Sao Luis de Montes Belos;Vera Cruz;Dom Pedro;Oros;Jardim de Piranhas;Martinopole;Nisia Floresta;Marechal Deodoro;Sao Sebastiao de Lagoa de Roca;Ocara;Brasil Novo;Campo Redondo;Santana do Ipanema;Ibiapina;Santa Teresinha;Cedral;Nova Ipixuna;Regeneracao;Groairas;Almeirim;Graca;Goiatuba;Mamanguape;Cortes;Paraiso do Tocantins;Boa Viagem;Taperoa;Benjamin Constant;Baia Formosa;Brasilia;Jaqueira;Parnamirim;Quirinopolis;Tefe;Mirinzal;Ourem;Caapiranga;Itapitanga;Farias Brito;Miguelopolis;Olho d'Agua das Flores;Cacimba de Dentro;Nobres;Conceicao da Feira;Itaitinga;Itaquitinga;Pirpirituba;Conceicao do Araguaia;Jose de Freitas;Barbalha;Cacimbinhas;Maribondo;Pau d'Alho;Juazeiro do Norte;Itapaci;Santa Luzia;Corrente;Capinopolis;Marcolandia;Jaguaquara;Aquiraz;Poco Branco;Quixeramobim;Mirante da Serra;Caico;Condado;Simao Dias;Paracuru;Redencao;Aquidauana;Santana do Acarau;Poxoreo;Rubiataba;Monsenhor Tabosa;Itatuba;Laje;Bonito;Macau;Pocinhos;Carius;Cabedelo;Chupinguaia;Capim Grosso;Alto do Rodrigues;Urbano Santos;Caririacu;Terra Boa;Araua;Icapui;Ourilandia do Norte;Piquet Carneiro;Uniao;Grossos;Buriti Bravo;Eirunepe;Sao Miguel do Guama;Sao Luis;Alexandria;Manuel Urbano;Alto Garcas;Alagoinhas;Upanema;Miguel Alves;Sao Pedro da Agua Branca;Carpina;Santa Helena de Goias;Fortaleza dos Nogueiras;Chaves;Montanhas;Jaguaripe;Amarante do Maranhao;Nova Ubirata;Saire;Igarape-Acu;Santo Antonio dos Lopes;Rosario do Catete;Potengi;Juina;Sao Jose do Campestre;Teresina;Santa Terezinha de Goias;Sao Goncalo do Amarante;Nova Veneza;Saloa;Joaquim Nabuco;Pedra Preta;Itacoatiara;Jaguaruana;Sao Miguel;Sao Felix do Araguaia;Solonopole;Aripuana;Guaraciaba do Norte;Sao Paulo do Potengi;Crixas;Iraci;Valenca do Piaui;Maracana;Sao Geraldo do Araguaia;Tabuleiro do Norte;Candeias;Fortuna;Tarauaca;Ubajara;Piranhas;Parintins;Cidelandia;Cantanhede;Tutoia;Tabatinga;Beruri;Porto Velho;Novo Lino;Fagundes;Major Isidoro;Sao Benedito do Rio Preto;Poco das Trincheiras;Quixabeira;Caetes;Goianesia;Sao Jose da Laje;Sobral;Vilhena;Morros;Igarapava;Itaiba;Brasileia;Nordestina;Ares;Santa Maria das Barreiras;Rio Largo;Correntes;Inhuma;Passo de Camarajibe;Salvador;Brejao;Brejo +KR;Cheongju;Sejong;Daegu;Pocheon;Andong;Jeju;Hongseong;Gimpo;Muan;Jeonju;Changwon;Heunghae;Haeryong;Busan;Daejeon;Gangneung;Korea, South;Bucheon;Mokpo;Chuncheon;Goyang;Paju;Suncheon;Guryongpo;Suwon;Incheon;Boryeong;Sokcho;Naju;Yangsan;Gwangyang;Gwangju;Pohang;Seoul;Ulsan;Yeosu +MX;La Resolana;Nacajuca;Colotlan;Emiliano Zapata;Tlacolula de Matamoros;Coyotepec;Rioverde;Xicotencatl;Amatitan;Chiconcuac;Rafael Delgado;Coacoatzintla;Cuautla;Choix;Ciudad Mante;Tacambaro de Codallos;Quiroga;Carlos A. Carrillo;Ciudad Altamirano;Benito Juarez;Acatic;La Huerta;Tuxtla;Cocotitlan;Acatzingo;Aguascalientes;Tanhuato de Guerrero;Texcatepec;Cardenas;San Jose del Cabo;Moloacan;Ciudad Madero;Juan Aldama;Cuquio;La Trinitaria;Rayon;Ojinaga;Jalapa;Jalpa de Mendez;Perote;Union de San Antonio;Zacatelco;Huandacareo;Ayutla de los Libres;Ixtlahuaca;Candelaria;Santa Ana Jilotzingo;Paraiso;Polotitlan de la Ilustracion;Morelia;Orizaba;Camargo;Acambaro;Santo Domingo Tehuantepec;Tecoanapa;San Pedro Mixtepec;San Andres Timilpan;Jose Maria Morelos;Chiapa de Corzo;Poza Rica de Hidalgo;Temamatla;Tlajomulco de Zuniga;Guadalajara;Candelaria Loxicha;Puerto Penasco;Silao;Chalco;Cuajinicuilapa;Salvatierra;Churumuco de Morelos;San Pedro Garza Garcia;Soledad de Graciano Sanchez;Santiago;Turicato;Escarcega;Chilapa de Alvarez;Zapotlan del Rey;La Huacana;Mexico City;Tamuin;Las Rosas;Capulhuac;Huauchinango;Petatlan;Navolato;Zontecomatlan de Lopez y Fuentes;Sultepec;Teotitlan;Pajapan;Cuencame de Ceniceros;San Francisco del Rincon;Tampamolon Corona;San Julian;Chimalhuacan;Macuspana;Jilotlan de los Dolores;Mineral del Monte;Tecamachalco;Empalme;Tixkokob;Nautla;Guanajuato;Salamanca;Motozintla;Ascension;Citlaltepec;Platon Sanchez;Heroica Nogales;Zapotlan de Juarez;El Pueblito;Teopisca;Atlatlahucan;Cerritos;Valle Hermoso;Coyuca de Catalan;Sonoita;Puebla;Cuernavaca;San Felipe del Progreso;Ixtaczoquitlan;Uriangato;Cochoapa el Grande;Santa Isabel Cholula;Salinas de Hidalgo;Ciudad de Atlixco;Valle de Bravo;Cunduacan;Yuriria;Santa Maria Huatulco;Abasolo;Ixtlahuacan del Rio;Cananea;Vicente Guerrero;Tlacolulan;Altepexi;Tapachula;Nanchital de Lazaro Cardenas del Rio;Ensenada;Ciudad Serdan;Tlalpujahua de Rayon;Jimenez;Tlalnepantla;Tonala;Chiconquiaco;Filomeno Mata;Magdalena de Kino;Coquimatlan;Reynosa;Ciudad Guzman;Acapetahua;Garcia;La Paz;Texistepec;Ocotlan de Morelos;Cosio;Ocuilan de Arteaga;Chilcuautla;San Andres Tuxtla;Doctor Mora;Magdalena;Tecolotlan;Arriaga;Felipe Carrillo Puerto;Nuevo Casas Grandes;San Pedro Pochutla;Tolcayuca;Juchitan de Zaragoza;Tamazunchale;Tejupilco;San Felipe;Valle Nacional;Tala;Villahermosa;San Luis Potosi;Tizimin;Huasca de Ocampo;Nacozari de Garcia;Santa Cruz Atizapan;Ciudad Guadalupe Victoria;Tehuacan;Altamira;Tlapa de Comonfort;Tecate;Tepetzintla;Jiutepec;Ocosingo;Mazapil;Jiquipilas;Encarnacion de Diaz;Etchojoa;San Pablo Huixtepec;Huejucar;Naucalpan de Juarez;Suchiapa;La Cruz;Saucillo;Yecuatla;Chicoloapan;Tres Valles;Delicias;Oteapan;Mazatlan Villa de Flores;Amatlan de los Reyes;Pinotepa;Zapotiltic;Jonuta;Jocotepec;Charcas;El Espinal;Jesus Maria;San Ignacio Cerro Gordo;Venustiano Carranza;Soledad Atzompa;El Rosario;Hecelchakan;Huejotzingo;Soledad de Doblado;Ayutla;Jaltenco;Tepotzotlan;Yahualica de Gonzalez Gallo;Celaya;Putla Villa de Guerrero;Erongaricuaro;Ciudad Santa Catarina;Tomatlan;Villa Corona;Torreon;San Juan del Rio del Centauro del Norte;San Buenaventura;Pantelho;Atoyac de Alvarez;Tlacotepec;Cozumel;Xico;Monclova;Apizaco;Coatepec;Mecatlan;Uman;Castillo de Teayo;Zimatlan de Alvarez;Ciudad Cuauhtemoc;Acambay;Guasave;San Pedro Jicayan;Tixtla de Guerrero;Espita;Jocotitlan;Ignacio de la Llave;Tepic;Joquicingo;Valparaiso;Campeche;Muna;Playa del Carmen;San Juan del Rio;Xalapa;Mazatan;Sabinas;Santiago Suchilquitongo;Guamuchil;Mineral de Angangueo;Temascalapa;Paso del Macho;Nochistlan de Mejia;Ixhuatlan de Madero;Cuautepec de Hinojosa;Hidalgotitlan;Tecamac;Meoqui;Ometepec;Zumpango del Rio;Champoton;Asuncion Nochixtlan;Texcoco;Ozuluama de Mascarenas;Acanceh;Halacho;Hueyotlipan;Tecali;Rincon de Romos;Mazatlan;Villa Luvianos;Cacahoatan;Tezontepec;San Nicolas de los Ranchos;Tatahuicapan;Lagos de Moreno;Colima;Soyalo;Isla Mujeres;Ixtapaluca;Galeana;Ciudad Benito Juarez;Yecapixtla;Durango;Huamantla;Ezequiel Montes;Juarez;Acatlan;San Pedro Huamelula;Actopan;Tepalcatepec;Villaflores;Monterrey;Playas de Rosarito;Tetela de Ocampo;Tepetlixpa;Texcaltitlan;Jamapa;Almoloya de Alquisiras;Zacualpan;Kanasin;Ozumba;San Blas;Queretaro;Degollado;Jaral del Progreso;Tecuala;Tzintzuntzan;Juventino Rosas;Ciudad Apodaca;Balancan;San Jose Villa de Allende;Ixhuatlancillo;Valle de Santiago;Santa Maria Jacatepec;Ayapango;Nuevo Laredo;Cotija de la Paz;Senguio;Mariano Escobedo;Tapalpa;Zacatecas;Cuitzeo del Porvenir;Frontera;Cerro Azul;Totolapan;Ciudad Miguel Aleman;Santa Maria Chilchotla;Tantoyuca;Union de Tula;San Fernando;Pahuatlan de Valle;Apaseo el Alto;San Jose del Rincon Centro;Toluca;Zapotlanejo;Cosamaloapan;Cadereyta Jimenez;Amatan;Huehuetoca;Chichihualco;Ramos Arizpe;Ocoyoacac;Santiago Tulantepec;Lerma;Ilamatlan;La Independencia;San Carlos Yautepec;Jiquilpan de Juarez;Ciudad de Allende;Panuco;San Miguel de Allende;Chanal;Zacatepec;Soyaniquilpan;Mexicaltzingo;Pachuca;Tamiahua;Ciudad Melchor Muzquiz;Heroica Guaymas;Soteapan;Simojovel de Allende;Guachochi;Naolinco de Victoria;Talpa de Allende;Fortin de las Flores;Coatepec Harinas;Tequila;Tepatitlan de Morelos;Calimaya;Cuilapan de Guerrero;Boca del Rio;Ciudad Hidalgo;Jaltipan de Morelos;Navojoa;Ciudad Lerdo;Atenco;Pijijiapan;Tenancingo;Tehuipango;Ciudad Acuna;Santa Ana Maya;San Pablo Villa de Mitla;Agua Prieta;Huatabampo;Calvillo;Huixcolotla;Ahualulco de Mercado;Almoloya;Tototlan;Cuatro Cienegas de Carranza;San Rafael;General Panfilo Natera;Veracruz;Ameca;Coatzacoalcos;Acaxochitlan;Gomez Palacio;Dolores Hidalgo Cuna de la Independencia Nacional;Tlalixcoyan;Jalacingo;Ocozocoautla de Espinosa;Acala;Manlio Fabio Altamirano;Tenango del Aire;Los Reyes de Juarez;Zaragoza;Santa Maria Xadani;Ixmiquilpan;Juchipila;Nuevo San Juan Parangaricutiro;Pihuamo;Compostela;Mazamitla;Pabellon de Arteaga;Pesqueria;Salinas Victoria;Tlahualilo de Zaragoza;Tlaxcala;Santiago Tuxtla;Etzatlan;Matamoros;Santa Maria Colotepec;Ecatzingo;Ursulo Galvan;Huehuetla;Parras de la Fuente;Manzanillo;Chavinda;Amatepec;Ciudad Constitucion;Piedras Negras;Tulum;Escuintla;Tijuana;Villa Aldama;Cosautlan;Acapulco de Juarez;Zinapecuaro;Alamo;Fresnillo;Tierra Colorada;Iguala de la Independencia;Coroneo;Alamos;San Mateo Atenco;Tulancingo;Guadalupe;Escuinapa;Cueramaro;Aldama;Huatusco;Mexicali;Huanimaro;Tuxpan;Monte Escobedo;Tlalnelhuayocan;San Luis Rio Colorado;Jose Cardel;Temascalcingo;Santiago Papasquiaro;Cuauhtemoc;Huixquilucan;Malinalco;Ciudad Rio Bravo;Zumpango;Chilon;Temoaya;Tezoyuca;Jamay;Comapa;Cuichapa;Comala;Teocuitatlan de Corona;Pichucalco;Amacuzac;Jerez de Garcia Salinas;Tlaquepaque;Santa Rosalia;Tepatlaxco;Tuxpam de Rodriguez Cano;Hueyapan de Ocampo;Atlautla;Chihuahua;Tepoztlan;Ciudad Sabinas Hidalgo;Chilpancingo;Parral;Villanueva;Papantla de Olarte;Ejutla de Crespo;Zempoala;Heroica Caborca;Mexico;Villa Purificacion;Cheran;Hunucma;Las Margaritas;Taxco de Alarcon;Villa de Zaachila;Angel R. Cabada;Cuetzalan;Ciudad General Escobedo;Nava;Tlachichilco;Melchor Ocampo;El Salto;Santa Maria del Tule;San Cristobal;Mecayapan;Maxcanu;Cuapiaxtla de Madero;Miacatlan;Tapilula;Comitan;Tempoal de Sanchez;Altamirano;Zacatlan;Comalapa;Zozocolco de Hidalgo;Merida;Puente Nacional;Ixhuatlan del Sureste;Los Mochis;Chapala;Tepetlan;Linares;Juchique de Ferrer;Amatenango del Valle;Santa Barbara;Puerto Vallarta;Zacoalco de Torres;San Gregorio Atzompa;Tlalmanalco;Villagran;Misantla;San Salvador;Tecoman;Tultepec;Coalcoman de Vazquez Pallares;Calpulalpan;Xalatlaco;Almoloya del Rio;Miahuatlan;Teoloyucan;Tlayacapan;San Pedro;Teapa;Playa Vicente;Mascota;Jalpan;Apan;Leon de los Aldama;Huaquechula;Villa Victoria;Las Vigas de Ramirez;Loreto;Santa Ana Chiautempan;Axochiapan;Cihuatlan;San Jose de Gracia;Bochil;Apaxco de Ocampo;Tlaltetela;Yajalon;Huixtla;Irapuato;Cienega de Flores;Nezahualcoyotl;Hostotipaquillo;Singuilucan;Chinampa de Gorostiza;Santa Maria Jalapa del Marques;Zongolica;San Juan Cancuc;San Jose Tenango;Santiago Tianguistenco;San Bartolo Tutotepec;Zihuatanejo;Ojuelos de Jalisco;San Martin de las Piramides;Tultitlan de Mariano Escobedo;Cintalapa de Figueroa;Tenosique;China;Ciudad de Huajuapam de Leon;Sayula de Aleman;Minatitlan;Tequixquiac;Villa Hidalgo;Mixtla de Altamirano;Doctor Arroyo;Tataltepec de Valdes;Sahuayo de Morelos;Progreso;Ciudad Obregon;Miahuatlan de Porfirio Diaz;Tizayuca;Santiago Ixcuintla;Cuautitlan Izcalli;Ciudad de Huitzuco;Reforma;Apaseo el Grande;San Francisco de los Romo;Isla;El Fuerte;Temascaltepec de Gonzalez;El Oro de Hidalgo;Sayula;Villa Donato Guerra;Patzcuaro;Uruapan;San Juan de los Lagos;Tetela del Volcan;Tepeapulco;Chalma;Culiacan;Cortazar;Tecolutla;Alvarado;San Juan Evangelista;Chapa de Mota;Jamiltepec;Ahumada;Cancun;Ixtlahuacan de los Membrillos;Tezoatlan de Segura y Luna;Pantepec;Francisco I. Madero;Vista Hermosa de Negrete;Oxchuc;Metepec;Tlacoachistlahuaca;Hermosillo;Castanos;Matehuala;Canatlan;Ixtapan de la Sal;Amecameca de Juarez;Naranjos;Ciudad Lopez Mateos;Pueblo Nuevo;Berriozabal;Zinacantepec;San Juanito de Escobedo;Maravatio de Ocampo;Ayotoxco de Guerrero;Villa Juarez;Ocotlan;Tecpan de Galeana;Amatenango de la Frontera;Chocaman;Comalcalco;Ciudad Manuel Doblado;Xochistlahuaca;Montemorelos;Tzitzio;Contla;Cruz Grande;Juanacatlan;Jilotepec;Allende;Axapusco;Huejuquilla el Alto;San Blas Atempa;Zacualpan de Amilpas;Tepechitlan;Totutla;Santa Ana Nextlalpan;San Martin Texmelucan de Labastida;La Piedad;Jaltenango;Nicolas Romero;Tezonapa;Atotonilco el Alto;Gonzalez;Tlalixtac de Cabrera;Cuautitlan;Cazones de Herrera;Santo Tomas de los Platanos;Villa Union;Pajacuaran;Epazoyucan;San Nicolas de los Garza;Penamiller;Ciudad del Carmen;Heroica Ciudad de Tlaxiaco;Heroica Matamoros;Autlan de Navarro;Tepetlaoxtoc;El Arenal;Tlapacoyan;Martinez de la Torre;Santa Maria Tonameca;Cadereyta;La Barca;Cordoba;Tenango de Doria;Madera;Otumba;Juan Rodriguez Clara;Tampico;Oaxaca;Huitzilan;Chetumal;Grajales;Benemerito;San Luis de la Paz;Penjamo;Sombrerete;Zapopan;Nuevo Ideal;Villa del Carbon;Tequisquiapan;Ciudad Valles;Temixco;Rio Grande;La Magdalena Tlaltelulco;Villa de Alvarez;Lerdo de Tejada;Villa Corzo;Tlaltenango de Sanchez Roman;Ciudad Tula;Saltillo;Catemaco;Chenalho;Ciudad Victoria;Santiago Tangamandapio;Pinal de Amoles;Nogales;Santa Clara del Cobre;Ecatepec;Acayucan;Paso de Ovejas;Juchitepec;Tila;Coacalco;Calkini;Santa Ana;Mapastepec;Palenque;Purisima de Bustos +EG;Az Zaqaziq;Kafr ash Shaykh;Al Fayyum;Suhaj;Qina;At Tur;Damanhur;Suez;Asyut;Al Minya;Matruh;Rafah;Aswan;Egypt;Al Kharjah;Ash Shaykh Zuwayd;Luxor;Port Said;Bani Suwayf;Ismailia;Al Mansurah;Damietta;Al Ghardaqah;Banha;Alexandria;Shibin al Kawm;Bi'r al `Abd;Al `Arish;Cairo;Giza;Tanta +BD;Rajshahi;Sylhet;Naogaon;Rangpur;Barishal;Dhaka;Khulna;Mymensingh;Chattogram;Gaibandha;Bangladesh +TH;Chaiyo;Phrai Bueng;Loei;Pa Mok;Phrae;Roi Et;Rattaphum;Phak Hai;Nong Khai;Kamphaeng Phet;Bang Sao Thong;Phuket;Phon Charoen;Sai Buri;Chiang Klang;Chum Phae;Mae Hong Son;Lop Buri;Prachuap Khiri Khan;Phra Phutthabat;Sattahip;Chachoengsao;Bang Phae;Pak Thong Chai;Phra Pradaeng;Chumphon;Thong Pha Phum;Bang Phlat;Mae Sot;Uthai Thani;Salaya;Bang Kruai;Chiang Rai;Bueng Kan;Thung Song;Ranong;Si Sa Ket;Singhanakhon;Akat Amnuai;Nong Bua Lamphu;Bua Yai;Sai Mai;Surat Thani;Yala;Tha Bo;Don Sak;Si Wilai;Det Udom;Rong Kwang;Sing Buri;Nakhon Si Thammarat;Samut Songkhram;Tha Chang;Doi Saket;Wang Sombun;Wang Nam Yen;Bueng Khong Long;Narathiwat;Ban Phai;Kanchanaburi;Maha Sarakham;Saraburi;Ban Na San;Chom Thong;Nakhon Phanom;Thoen;Lamphun;Pak Chong;Kalasin;Ban Bueng;Thung Sai;Sa Kaeo;Bang Bua Thong;Thap Khlo;Phayao;Songkhla;Hat Yai;Huai Yot;Chon Buri;Pak Phanang;Phangnga;Phimai;Warin Chamrap;Kanchanadit;Takua Pa;Phichit;Non Sung;Kaset Wisai;Tha Yang;Phitsanulok;Sakon Nakhon;Nakhon Ratchasima;Klaeng;Doi Lo;Ubon Ratchathani;Pa Sang;Buri Ram;Kantharalak;Tha Mai;Khlong Luang;Mae Ai;Photharam;Tak Bai;Sawankhalok;Rayong;Krathum Baen;Chaiyaphum;Phanat Nikhom;Nong Ki;San Kamphaeng;Suphan Buri;Fao Rai;Chai Prakan;Pran Buri;Tak;Aranyaprathet;Khon Kaen;Taphan Hin;Na Klang;Lam Luk Ka;Khao Yoi;Ang Thong;Ban Chang;Si Satchanalai;Bangkok;Bang Khla;Sadao;Nonthaburi;Phra Nakhon Si Ayutthaya;Wiang Haeng;Sai Ngam;Ko Samui;Bang Racham;Betong;Mukdahan;Dok Kham Tai;Phra Samut Chedi;Pua;Chiang Mai;Tha Luang;Udon Thani;Kosum Phisai;Lampang;Samut Prakan;Wichian Buri;Thap Than;Ban Laem;Trat;Chok Chai;Bang Ban;Nam Som;Wang Saphung;Sikhio;Kathu;Pong Nam Ron;Kantang;Si Racha;Kaeng Khro;Trang;Chanthaburi;Wiang Sa;Nong Khae;Mae Wang;Mae Rim;Pathum Thani;Kranuan;Song Phi Nong;Tha Muang;Nong Bua;Prakhon Chai;Suwannaphum;Satuek;Nakhon Nayok;Nan;Samut Sakhon;Ratchaburi;Nakhon Pathom;Pattani;Cha-am;Ban Pong;Lom Sak;Sukhothai;Nakhon Thai;Phetchabun;Mae Sai;Yasothon;Hua Hin;Khlung;Su-ngai Kolok;Sung Noen;Amnat Charoen;Kaeng Khoi;Pak Kret;Nakhon Sawan;Phetchaburi;Sam Phran;Ban Dung;Den Chai;Satun;Kuchinarai;Nong Wua So;Na Wa;Nang Rong;Phai Sali;Prachin Buri;Phon;Sam Ko;Kut Chap;Yang Talat;Chai Nat;Kamalasai;Phatthalung;Krabi;Surin;Lang Suan;Phibun Mangsahan;Nong Kung Si;Kui Buri;Takhli;Na Yung;Uttaradit;Thailand +RU;Magas;Pavlovskiy Posad;Kem;Baksan;Barnaul;Dzerzhinsk;Primorsko-Akhtarsk;Tuapse;Bezhetsk;Mozhaysk;Rybinsk;Gulkevichi;Shatura;Ardon;Akhtubinsk;Izhevsk;Kirovo-Chepetsk;Taldom;Zhukovo;Argun;Apatity;Magadan;Nytva;Zelenogradsk;Derbent;Gorno-Altaysk;Budennovsk;Kostroma;Saint Petersburg;Baltiysk;Novoselitskoye;Khimki;Peschanokopskoye;Maykop;Yeysk;Konstantinovsk;Krasnyy Sulin;Kargopol;Starodub;Kudymkar;Sharya;Tutayev;Stavropol;Kotelniki;Kokhma;Ivanovo;Petropavlovsk-Kamchatskiy;Elkhotovo;Slavyansk-na-Kubani;Zheleznogorsk;Kashin;Vyazma;Astrakhan;Gryazi;Kimovsk;Krasnyy Yar;Komsomol'sk-na-Amure;Uzlovaya;Novopavlovsk;Seltso;Boksitogorsk;Shakhunya;Tomsk;Kondopoga;Vyksa;Kineshma;Kirishi;Kimry;Zhukovskiy;Kochubeyevskoye;Pervomaysk;Sergach;Lyudinovo;Vyatskiye Polyany;Nalchik;Khasavyurt;Ulan-Ude;Shuya;Gvardeysk;Nizhniy Novgorod;Slobodskoy;Gatchina;Akhty;Sergiyev Posad;Zelenokumsk;Rybnoye;Chaplygin;Tyumen;Mirnyy;Bogorodsk;Krasnoyarsk;Kemerovo;Udomlya;Chudovo;Alagir;Zadonsk;Kirov;Novodvinsk;Yaransk;Tver;Privolzhsk;Lytkarino;Nikel;Vladimir;Tula;Ramenskoye;Dankov;Kozelsk;Kalyazin;Abakan;Rasskazovo;Galich;Kuvshinovo;Volokolamsk;Svetlyy;Ostrov;Dylym;Kameshkovo;Belaya Glina;Pochinok;Losino-Petrovskiy;Nar'yan-Mar;Kungur;Yur'yev-Pol'skiy;Uchkeken;Kstovo;Sasovo;Ostashkov;Gudermes;Uglich;Saratov;Valday;Sarapul;Solnechnogorsk;Labinsk;Pitkyaranta;Mozdok;Belorechensk;Pestovo;Kozmodemyansk;Ipatovo;Shchigry;Belaya Kholunitsa;Pionerskiy;Prokhladnyy;Uren;Lebedyan;Chekhov;Kandalaksha;Lyubertsy;Oboyan;Vyazniki;Rylsk;Borovsk;Anapa;Gavrilov-Yam;Unecha;Belgorod;Bologoye;Saransk;Urzhum;Syktyvkar;Kizlyar;Kursavka;Ikryanoye;Aleksandrovskoye;Krasnogorsk;Stupino;Terek;Cheboksary;Digora;Voronezh;Novosibirsk;Mytishchi;Smolensk;Salekhard;Lukoyanov;Volosovo;Kotelnich;Salsk;Danilov;Furmanov;Sobinka;Kislovodsk;Temryuk;Beslan;Azov;Zhukovka;Anadyr;Aksay;Polyarnyy;Blagoveshchensk;Podporozhye;Pskov;Kizilyurt;Nelidovo;Yaroslavl;Podolsk;Belomorsk;Orel;Noginsk;Korenovsk;Reutov;Volgorechensk;Rzhev;Teykovo;Yuzha;Semenov;Krasnogvardeyskoye;Suzdal;Tikhvin;Priozersk;Dubovskoye;Tambov;Kaliningrad;Korolev;Mozhga;Kovdor;Pokrovskoye;Aleksin;Elektrostal;Semikarakorsk;Lodeynoye Pole;Kursk;Bryansk;Khanty-Mansiysk;Volodarsk;Fokino;Yartsevo;Segezha;Domodedovo;Kingisepp;Kolomna;Fryazino;Millerovo;Stroitel;Serpukhov;Slantsy;Moscow;Yoshkar-Ola;Levashi;Naro-Fominsk;Suoyarvi;Nazran;Ulyanovsk;Tikhoretsk;Odintsovo;Tsimlyansk;Botlikh;Kasimov;Pyatigorsk;Torzhok;Luga;Ertil;Bronnitsy;Apsheronsk;Vyshniy Volochek;Pavlovsk;Krymsk;Chernyakhovsk;Nartkala;Makhachkala;Klintsy;Dubna;Babayurt;Yelets;Novoaleksandrovsk;Malaya Vishera;Yefremov;Rudnya;Velsk;Izobil'nyy;Novozybkov;Orenburg;Pochep;Yessentuki;Sudogda;Narimanov;Ust'-Dzheguta;Skopin;Yekaterinburg;Sortavala;Zvenigovo;Ryazhsk;Kamyzyak;Korablino;Kola;Velikiy Novgorod;Manturovo;Konakovo;Shchekino;Onega;Yasnogorsk;Kulebaki;Melenki;Rodniki;Novomoskovsk;Yegoryevsk;Chaltyr;Soltsy;Ryazan;Vologda;Dmitrov;Kamensk-Shakhtinskiy;Lgov;Timashevsk;Neftekumsk;Mezen;Kambarka;Belev;Lyskovo;Tyrnyauz;Samara;Kazan;Volkhov;Levokumskoye;Pavlovo;Kovrov;Balashikha;Omutninsk;Murmansk;Belaya Kalitva;Kirovsk;Vladikavkaz;Zheleznovodsk;Vsevolozhsk;Petrozavodsk;Zavyalovo;Shali;Zernograd;Divnoye;Cherkessk;Dolgoprudnyy;Khabarovsk;Mayskiy;Pudozh;Nevel;Zavolzhsk;Chita;Kashira;Gusev;Velikiye Luki;Krasnoznamensk;Zuyevka;Pereslavl'-Zalesskiy;Irkutsk;Tarusa;Gus'-Khrustal'nyy;Chernogolovka;Istra;Groznyy;Lipetsk;Safonovo;Gorodets;Dorogobuzh;Omsk;Murom;Medvezhyegorsk;Rossosh;Kirzhach;Yuzhno-Sakhalinsk;Borovichi;Arzgir;Vichuga;Mtsensk;Blagodarnyy;Kotlas;Urus-Martan;Kurchaloy;Mikhaylov;Adygeysk;Orekhovo-Zuyevo;Karachev;Suvorov;Gagarin;Shchelkovo;Surazh;Proletarsk;Klin;Kaluga;Guryevsk;Severodvinsk;Gorokhovets;Elista;Karachayevsk;Arzamas;Kireyevsk;Penza;Pushkino;Severomorsk;Sernovodsk;Svetlogorsk;Abinsk;Luza;Perevoz;Porkhov;Kasumkent;Neman;Votkinsk;Nevinnomyssk;Neya;Nyandoma;Sukhinichi;Volgograd;Lobnya;Malgobek;Kirs;Birobidzhan;Petushki;Buynaksk;Pechory;Achkhoy-Martan;Yelnya;Morozovsk;Rostov;Lermontov;Kolchugino;Karabulak;Obninsk;Staraya Russa;Kyzyl;Kurchatov;Sarov;Perm;Vidnoye;Navashino;Toropets;Nolinsk;Livny;Usman;Kurgan;Svetlograd;Georgiyevsk;Voskresensk;Krasnodar;Vladivostok;Volzhsk;Opochka;Aleksandrov;Roslavl;Vyborg;Venev;Ufa;Chelyabinsk;Donskoy;Russia;Buy;Mineral'nyye Vody;Chkalovsk;Plavsk;Tosno;Koryazhma;Glazov;Arkhangelsk;Sovetsk;Nerekhta;Kostomuksha;Bogoroditsk;Kharabali;Kondrovo;Yakutsk;Likhoslavl;Dzerzhinskiy;Ruza;Maloyaroslavets;Kurganinsk +AR;Colon;Famailla;Carhue;Las Flores;San Justo;Pigue;Ranchos;General Jose de San Martin;General Juan Madariaga;Esteban Echeverria;Santiago del Estero;Obera;Chamical;Azul;La Quiaca;Jesus Maria;Berisso;Rio Cuarto;Pilar;General Villegas;El Calafate;Villa Angela;Benito Juarez;San Ramon de la Nueva Oran;San Isidro;Zarate;Quimili;Chacabuco;Bernardo de Irigoyen;San Rafael;Apostoles;Abra Pampa;Bragado;Joaquin V. Gonzalez;Junin de los Andes;Chascomus;Las Heras;Rauch;Gualeguay;Necochea;Concepcion del Uruguay;General Acha;La Leonesa;Gobernador Virasora;San Bernardo;Aguilares;Merlo;Gastre;General Las Heras;Coronel Dorrego;Tres Arroyos;Baradero;Campo Largo;Villa Berthet;Alto Rio Senguer;Anatuya;Santa Rosa de Rio Primero;Mar del Plata;Lomas de Zamora;Esquel;Moron;Malargue;General Rodriguez;Empedrado;Capitan Sarmiento;Villa La Angostura;Esperanza;Tandil;Termas de Rio Hondo;Daireaux;Rosario del Tala;Carmen de Areco;Banda del Rio Sali;Monte Quemado;San Miguel de Tucuman;Reconquista;Diamante;Cerrillos;Formosa;Tostado;Lincoln;General Martin Miguel de Guemes;Nueve de Julio;Andalgala;Berazategui;General Pinedo;Goya;San Juan;Pehuajo;Canuelas;Maipu;Canada de Gomez;Ushuaia;La Cruz;San Luis;Buenos Aires;Jose Maria Ezeiza;General San Martin;Rosario;Resistencia;Monte Caseros;Sierra Colorada;Rodeo;Concepcion;Quitilipi;Balcarce;Las Brenas;Chepes;Puerto Madryn;Miramar;Las Rosas;Comodoro Rivadavia;General Alvear;Puan;Castelli;Nogoya;La Rioja;Federal;Ramallo;Libertador General San Martin;Villa Ojo de Agua;Jose C. Paz;Tornquist;Saladas;Viedma;Santa Fe;Rivadavia;General Conesa;San Julian;Bell Ville;Capilla del Senor;Laboulaye;General Belgrano;Ingeniero Guillermo N. Juarez;Victoria;Guernica;Perito Moreno;Arrecifes;Carmen de Patagones;Caucete;Gualeguaychu;Caseros;San Martin de los Andes;Machagai;Villa Maria;Lanus;Monteros;Belen de Escobar;Hurlingham;Palpala;Villa Paranacito;Campana;Villa del Rosario;Gobernador Gregores;Chimbas;Lujan de Cuyo;Zapala;Frias;Argentina;Lobos;Pergamino;General Viamonte;Posadas;Catamarca;Concordia;Villa Mercedes;San Jose de Jachal;San Roque;Parana;Henderson;Adolfo Gonzales Chaves;Chilecito;Carlos Casares;Santa Rosa;Godoy Cruz;Puerto Tirol;Charata;Rio Gallegos;Villa Constitucion;Federacion;Clorinda;San Vicente;Casilda;Tinogasta;Victorica;Ensenada;Alta Gracia;Rosario de Lerma;Rafaela;San Antonio de Areco;Neuquen;La Banda;Dean Funes;Belen;Cosquin;Villaguay;San Andres de Giles;Presidencia Roque Saenz Pena;Avellaneda;Oliva;Choele Choel;Sauce;San Jose de Feliciano;General Pico;Puerto Rico;San Cristobal;Puerto Deseado;La Plata;Villa Krause;Chumbicha;Mburucuya;Villa Union;San Fernando;Chivilcoy;La Carlota;Marcos Juarez;Marcos Paz;Susques;La Paz;Bahia Blanca;Olavarria;Mercedes;San Francisco;Cruz del Eje;Leandro N. Alem;Vera;Los Polvorines;Corzuela;Pinamar;Tafi Viejo;Veinticinco de Mayo;Ituzaingo;Villa Aberastain;Mendoza;Ayacucho;Cafayate;Rosario de la Frontera;Santa Sylvina;General Roca;San Martin;San Isidro de Lules;Cordoba;Sarmiento;Magdalena;Telsen;Tartagal;Villa Gesell;Coronda;Loberia;San Nicolas de los Arroyos;Lujan;Junin;Comandante Fontana;Tres Isletas;Corrientes;Presidencia de la Plaza;San Salvador;Yerba Buena;San Salvador de Jujuy;Dolores;San Antonio Oeste;San Luis del Palmar;Navarro;Humahuaca;Santa Lucia;Eldorado;San Javier;Pirane;Pampa del Infierno;Salta;Bella Vista;Rinconada;Paso de los Libres;Rio Grande;San Carlos de Bariloche;San Pedro;Tunuyan;Trenque Lauquen;Curuzu Cuatia;Roque Perez;Las Lajas;Florencio Varela;Quilmes;Rawson;Brandsen;Punta Alta;Santa Maria;Coronel Suarez;Chos Malal;Santo Tome;San Antonio de los Cobres;Trancas +NG;Kaduna;Somolu;Auchi;Umuahia;Ede;Abeokuta;Aba;Dutse;Yenagoa;Ilobu;Owo;Damboa;Igboho;Asaba;Chibok;Baure;Zango;Nguru;Ipokia;Nsukka;Biu;Ikot Ekpene;Ibadan;Offa;Kaura Namoda;Orlu;Wukari;Minna;Lokoja;Otukpo;Agege;Jalingo;Lafia;Malumfashi;Isanlu;Ikom;Birnin Kebbi;Epe;Hong;Ugep;Abakaliki;Sokoto;Bama;Mubi;Calabar;Potiskum;Kontagora;Ondo;Kukawa;Sapele;Akure;Apapa;Ile-Ife;Numan;Enugu;Nnewi;Azare;Ikot Abasi;Okene;Iwo;Awka;Makurdi;Zaria;Kano;Funtua;Owerri;Shagamu;Nigeria;Kisi;Gashua;Benin City;Gusau;Osogbo;Bauchi;Gwoza;Ogbomoso;Katsina;Idah;Ilesa;Iseyin;Kuje;Warri;Abuja;Ipetumodu;Osisioma;Maiduguri;Jos;Keffi;Gombe;Lagos;Uyo;Ado-Ekiti;Mushin;Abaji;Burutu;Yola;Mai'Adua;Oyo;Ikeja;Port Harcourt;Saki;Kumo;Ikare;Ilorin;Gwagwalada;Damaturu;Onitsha;Ijebu-Ode;Bida +TR;Alasehir;Harran;Andirin;Selcuk;Kocaeli;Golcuk;Iznik;Duzici;Kepsut;Salihli;Kavaklidere;Gebze;Yatagan;Askale;Turgutlu;Sulakyurt;Alanya;Karapinar;Civril;Bolvadin;Karamursel;Koprukoy;Didim;Sivas;Battalgazi;Karkamis;Sungurlu;Sinanpasa;Tosya;Eruh;Ardesen;Gemerek;Cicekdagi;Soke;Selcuklu;Konak;Sorgun;Ayvalik;Arhavi;Konya;Koycegiz;Karakocan;Sariveliler;Yigilca;Pamukova;Bodrum;Devrek;Turkeli;Ciftlik;Kars;Ankara;Erfelek;Kelkit;Yesilova;Guneysu;Hassa;Hazro;Ozalp;Malkara;Sarkisla;Pendik;Arifiye;Caycuma;Egirdir;Catalpinar;Ceyhan;Kapakli;Catak;Kavak;Gorele;Pazar;Golbasi;Gumushane;Yenimahalle;Of;Beydag;Muradiye;Cesme;Siirt;Cerkezkoy;Unye;Nazilli;Taskopru;Tonya;Erbaa;Kilis;Bitlis;Ceylanpinar;Solhan;Armutlu;Nilufer;Bafra;Kovancilar;Yomra;Aralik;Cayeli;Avanos;Terme;Niksar;Adana;Gomec;Mudanya;Menderes;Simav;Golmarmara;Mazidagi;Taslicay;Istanbul;Kirkagac;Akdagmadeni;Erzin;Buldan;Akhisar;Bilecik;Cobanlar;Baykan;Oltu;Gole;Saraykent;Kumlu;Havsa;Mardin;Aksaray;Kadinhani;Cat;Akkus;Mezitli;Toprakkale;Dursunbey;Osmangazi;Antalya;Ulubey;Huyuk;Gevas;Cankiri;Ayancik;Sebin Karahisar;Susuz;Kayseri;Yakacik;Sultandagi;Gumusova;Safranbolu;Ferizli;Meric;Tunceli;Altinova;Buyukcekmece;Cungus;Dargecit;Kucukkuyu;Espiye;Cubuk;Ortakoy;Vize;Sarkikaraagac;Acigol;Hayrabolu;Golyaka;Sabanozu;Kecioren;Kesan;Uzumlu;Ula;Harbiye;Sultanhani;Suruc;Elmadag;Bayramic;Oguzeli;Ilic;Alacati;Diyadin;Gerze;Yaprakli;Kozan;Polatli;Digor;Bahcesaray;Urla;Mut;Kastamonu;Resadiye;Akyurt;Kizilcahamam;Derinkuyu;Siran;Cal;Saruhanli;Sarioglan;Malatya;Menemen;Gumushacikoy;Kiraz;Isparta;Ahmetli;Besni;Haskoy;Cildir;Can;Cumra;Dortyol;Datca;Golpazari;Soma;Sarayonu;Hinis;Divrigi;Varto;Arapgir;Adakli;Osmancik;Acipayam;Adalar;Kulu;Sivrihisar;Muratli;Dereli;Turkey;Sogut;Carsamba;Tekkekoy;Akcaabat;Yildizeli;Suhut;Bozdogan;Arsuz;Cifteler;Kahramanmaras;Van;Manavgat;Demre;Marmaris;Gordes;Cekerek;Kangal;Denizli;Hidirbey;Samsun;Yakakent;Bergama;Midyat;Korkut;Sariyer;Gonen;Sutculer;Nizip;Tasucu;Karatas;Yumurtalik;Asarcik;Serinhisar;Duzkoy;Camardi;Incirliova;Alapli;Tatvan;Kaynarca;Kirsehir;Osmaneli;Eregli;Cavdir;Karaburun;Tekirdag;Goksun;Yenipazar;Afsin;Sincan;Hani;Manisa;Hatay;Pinarbasi;Dicle;Pazarcik;Beypazari;Alpu;Kocasinan;Umurlu;Sason;Kagizman;Siverek;Sanliurfa;Arakli;Marmara Ereglisi;Kinik;Ortaklar;Eskisehir;Ahlat;Honaz;Yahyali;Tire;Amasra;Sariz;Dogubayazit;Camoluk;Susehri;Ulukisla;Elbistan;Ciftlikkoy;Sarkoy;Mamak;Nigde;Balikesir;Ezine;Karayazi;Guneysinir;Tasova;Bayburt;Gulsehir;Cayiralan;Corlu;Erdemli;Banaz;Kale;Incesu;Talas;Tortum;Eynesil;Dosemealti;Golkoy;Karaman;Bozkurt;Kutahya;Senkaya;Urgup;Belen;Canakkale;Tavas;Kosk;Mersin;Domanic;Kilimli;Kozluk;Genc;Tirebolu;Foca;Lice;Yavuzeli;Bingol;Ermenek;Mecitozu;Zonguldak;Korkuteli;Caldiran;Semdinli;Corum;Silivri;Kulp;Yalova;Serik;Amasya;Gaziantep;Refahiye;Ihsaniye;Uludere;Ayvacik;Gerede;Ivrindi;Kocakoy;Kozlu;Sultanhisar;Sile;Hisarcik;Kursunlu;Sinop;Birecik;Borcka;Sogutlu;Yuksekova;Ispir;Korfez;Cinar;Balya;Araban;Luleburgaz;Meram;Akseki;Gurgentepe;Savsat;Gemlik;Samandag;Aydin;Fatsa;Lapseki;Erdek;Tufanbeyli;Gokcebey;Yozgat;Emirgazi;Bismil;Doganhisar;Zile;Celtik;Pozanti;Ortaca;Adiyaman;Aliaga;Akyazi;Aktepe;Kayapinar;Agri;Dogansehir;Burhaniye;Sakarya;Rize;Salpazari;Caglayancerit;Tokat;Akcakoca;Erzurum;Gaziemir;Palu;Diyarbakir;Reyhanli;Bulancak;Sarikamis;Karpuzlu;Ondokuzmayis;Gazipasa;Bucak;Gallipoli;Anamur;Surmene;Keskin;Ipsala;Pervari;Imamoglu;Selim;Yahsihan;Kemalpasa;Cizre;Hakkari;Hacilar;Trabzon;Camas;Dikili;Halfeti;Bartin;Yaglidere;Cinarcik;Hendek;Narman;Seyitgazi;Sur;Yenisehir;Alaca;Bursa;Kadirli;Turkoglu;Buyukorhan;Vezirkopru;Bayat;Kucukcekmece;Inegol;Cayirova;Aybasti;Hizan;Karabuk;Egil;Kurtalan;Taskent;Mugla;Bor;Artvin;Kumru;Igdir;Kabatas;Mus;Yerkoy;Viransehir;Nurdagi;Sarikaya;Tuzluca;Senirkent;Yildirim;Dalaman;Tut;Zara;Savur;Eleskirt;Karaisali;Gulagac;Aglasun;Iskenderun;Cihanbeyli;Bornova;Sivrice;Guclukonak;Tomarza;Nevsehir;Turhal;Yuregir;Suluova;Patnos;Catalca;Karasu;Fethiye;Batman;Bahce;Cide;Aydincik;Derik;Sereflikochisar;Usak;Edirne;Arpacay;Cerkes;Vakfikebir;Silifke;Torbali;Gorukle;Milas;Cumayeri;Omerli;Susurluk;Gurun;Giresun;Findikli;Piraziz;Sirnak;Kusadasi;Saimbeyli;Alacam;Tekman;Eceabat;Saricam;Ordu;Kocarli;Kirikkale;Babaeski;Elmali;Manyas;Bolu;Yesilhisar;Nurhak;Aslanapa;Sindirgi;Kas;Sarigol;Bayindir;Cankaya;Cermik;Horasan;Baglar;Akcakale;Arsin;Serinyol;Mucur;Bozkir;Bandirma;Baskil;Uzundere;Kirikhan;Germencik;Sirvan;Bala;Goynucek;Selendi;Aladag;Afyonkarahisar;Develi;Gursu;Cay;Cine;Beysehir;Biga;Koyulhisar;Karacabey;Demirozu;Edremit;Tarsus;Emet;Akyaka;Silvan;Altinozu;Kalecik;Besiri;Cayirli;Carsibasi;Emirdag;Osmaniye;Yunak;Altindag;Davutlar;Kose;Hilvan;Nusaybin;Yalvac;Arac;Bogazliyan;Feke;Sapanca;Bozuyuk;Geyve;Caykara;Yayladagi;Cameli;Orhaneli;Aksehir;Mesudiye;Orhangazi;Buharkent;Caybasi;Silopi;Idil;Pasinler;Havran;Kula;Karahalli;Alucra;Karakopru;Bulanik;Mutki;Seyhan;Islahiye;Baskale;Sefaatli;Altinyayla;Altintas;Altunhisar;Aksu;Kesap;Karliova;Saray;Ayas;Gediz;Yenice;Golhisar;Torul;Sivasli;Saraykoy;Duzce;Odemis;Kahta;Adilcevaz;Nallihan;Esme;Bigadic;Yazihan;Basmakci;Ulus;Beykoz;Yusufeli;Savastepe;Izmir;Ilgin;Karsiyaka;Aricak;Seferhisar;Seydisehir;Kocaali;Ekinozu;Mustafakemalpasa;Gurpinar;Almus;Pazaryeri;Uzunkopru;Gokceada;Besikduzu;Ergani;Burdur;Guney;Karapurcek;Yesilli;Ercis;Derince;Havza;Karacasu;Kumluca;Demirci;Altinekin;Etimesgut;Inebolu;Tefenni;Haymana;Koprubasi;Bozyazi;Kartal;Yesilyurt;Evren;Melikgazi;Merzifon;Boyabat;Buca;Persembe;Kandira;Iskilip;Sumbas;Karacoban;Ikizce;Hadim;Kiziltepe;Guroymak;Korgan;Darende;Bunyan;Bozova;Elazig;Kirklareli;Macka;Puturge;Hopa;Kuyucak;Akcadag;Kaman;Keles;Ardahan;Duragan;Finike;Tavsanli;Cilimli;Malazgirt;Erzincan;Cukurca;Kaynasli;Camliyayla;Gulnar +PK;Tando Muhammad Khan;Jacobabad;Chilas;Peshawar;Turbat;Jamshoro;Gujrat;Gujranwala;Sahiwal;Khanewal;New Mirpur;Pishin;Umarkot;Jhelum;Ziarat;Timargara;Dera Allahyar;Jhang City;Uthal;Kohat;Saidu Sharif;Kalat;Vihari;Thatta;Badin;Gakuch;Quetta;Muzaffargarh;Sialkot City;Bhakkar;Khushab;Shekhupura;Bannu;Lahore;Swabi;Hyderabad City;Mardan;Kotli;Khairpur Mir's;Karak;Lodhran;Panjgur;Gwadar;Gilgit;Dera Ghazi Khan;Haripur;Bahawalnagar;Okara;Faisalabad;Bahawalpur;Batgram;Hangu;Chiniot;Abbottabad;Karachi;Ghotki;Sanghar;Mirpur Khas;Sargodha;Bagh;Naushahro Firoz;Pakpattan;Kharan;Tando Allahyar;Pakistan;Dadu;Mandi Bahauddin;Sukkur;Nowshera;Tank;Nawabshah;Chitral;Hafizabad;Zhob;Rawalpindi;Islamabad;Charsadda;Larkana;Toba Tek Singh;Kasur;Mianwali;Multan;Chakwal;Mansehra;Dera Ismail Khan +VN;Thanh Phu;Son La;Hung Yen;Tay Ninh;Thai Binh;Con Dao;Buon Ma Thuot;Cam Ranh;An Chau;Nha Trang;Phu Tho;My Tho;Dong Ha;A Yun Pa;Ha Giang;Ho Chi Minh City;Hoa Binh;Bac Kan;Ky Anh;Da Nang;Kien Giang;Nga Bay;Haiphong;Soc Trang;Tam Ky;Kien Luong;Dong Hoi;Huong Tra;Tuy Hoa;Duc Pho;Cho Moi;Pleiku;Cao Bang;Di Linh;Bien Hoa;Phan Rang-Thap Cham;Long My;Thu Duc;Vung Tau;Thu Dau Mot;An Phu;Yen Bai;Thanh Hoa;Cho Lach;Phu Quoc;Phan Thiet;Trang Bang;Phuc Yen;Lang Son;Vi Thanh;Di An;Long Xuyen;Hanoi;Rach Gia;An Nhon;Tan Chau;Sa Pa;Quang Ngai;Hai Duong;Gia Nghia;Phu Ly;Kon Tum;Dong Xoai;Duyen Hai;Ba Don;Long Thanh;Viet Tri;Ha Long;Ca Mau;Ha Tinh;Bac Giang;Buon Trap;Mong Cai;Thai Nguyen;Vinh;Ninh Hoa;Bo;Tan Phu;Nam Dinh;Long Khanh;Kinh Mon;Quy Nhon;Binh Minh;Son Tay;Bac Ninh;Thi Tran Ngai Giao;Vinh Yen;Vietnam;Huong Canh;Hue;Tam Diep;Dien Bien Phu;Cam Pha;Vinh Chau;Ben Tre;Chon Thanh;Ha Tien;Bao Loc;Lai Chau;Lao Cai;Song Cau;Can Tho;Tan Uyen;Khe Sanh;Tan An;Tuyen Quang;Cao Lanh;Quang Yen;Tra Vinh;Bac Lieu;Vinh Long;Ninh Binh;Hoa Thanh;Sa Dec;Hang Tram;Da Lat +IR;Bastak;Ahram;Qarah Zia' od Din;Lordegan;Quchan;Kavar;Rasak;Kalardasht;Bandar `Abbas;Shirvan;Bandar-e Gaz;Asadabad;Sabzevar;Ramsar;Komijan;Showt;Fannuj;Ravar;Semirom;`Aliabad-e Katul;Qazvin;Qarchak;Shahin Dezh;Zarrin Shahr;Zaboli;Boshruyeh;Heris;Torbat-e Heydariyeh;Ahvaz;Dalgan;Bojnurd;Ardal;Dehloran;Kermanshah;Aligudarz;Neqab;Sonqor;Neyriz;Bahar;Dehaqan;Kaleybar;Bam;Ashtian;Zehak;Arsanjan;Khash;Osku;Piranshahr;Garmsar;Meshgin Shahr;Namin;Robat Karim;Sari;Estahban;Pakdasht;Azarshahr;Basht;Khorramshahr;Mehran;Shahr-e Kord;Abhar;Razan;Poldasht;`Anbarabad;Gerash;Charam;Rabor;Rafsanjan;Zahedan;Kahnuj;Nazarabad;Sa`adat Shahr;Sarableh;Borujerd;Bajestan;Bafq;Landeh;Ilam;Sarakhs;Miandasht;Fereydunshahr;Hamadan;Ramshir;Zanjan;Nowshahr;Varamin;Suran;Mirjaveh;Babolsar;Qorveh;Khomeyn;Tabas;Tabriz;Semnan;Saqqez;Azadshahr;Anar;Qa'en;Birjand;Shahin Shahr;Khalkhal;Hendijan;Shahr-e Herat;Borujen;Tiran;Qom;Paveh;Aleshtar;Jam;Torqabeh;Galugah;Kherameh;Bandar-e Bushehr;Sarbisheh;Ashkhaneh;Saravan;Joghtay;Khorramdarreh;Seyah Cheshmeh;Do Gonbadan;Ahar;Dowlatabad;Eslamshahr;Taft;Yazd;Mashhad;Dehgolan;Farashband;Sarpol-e Zahab;Bukan;Marvdasht;Baneh;Sorkheh;Damavand;Nehbandan;Torbat-e Jam;Darreh Shahr;Sepidan;Ferdows;Kalaleh;Bandar-e Torkaman;Kuhbanan;Bandar-e `Asaluyeh;Kuhdasht;Kangan;Malekan;Farsan;Sarayan;Kashmar;Nikshahr;Ardestan;Jiroft;Shiraz;Mobarakeh;Galikesh;Astaneh-ye Ashrafiyeh;Fariman;Harsin;Germi;Najafabad;Eqlid;Hashtrud;Maragheh;Omidiyeh;Sowme`eh Sara;Marand;Gotvand;Golpayegan;Andimeshk;Ardabil;Shushtar;Taybad;Ravansar;Delijan;Dargaz;Hoveyzeh;Oshnaviyeh;Bardaskan;Fasa;Maraveh Tappeh;Langarud;Aran Bidgol;Kabudarahang;Bileh Savar;Amlash;Susangerd;Feyzabad;Gomishan;Alvand;Khorramabad;Javanrud;Hamidiyeh;Amol;Ben;Mahallat;Damghan;Khvansar;Takab;Fardis;Azna;Esfarayen;Tuyserkan;Jolfa;Karaj;Meybod;Kerman;Shabestar;Siahkal;Bandar-e Khamir;Shahreza;Shush;Fuman;Bijar;Shahr-e Qods;Arak;Sanandaj;Likak;Esfahan;Hashtpar;Ramhormoz;Ashkezar;Dorud;Zarand;`Ajab Shir;Zabol;Takestan;Kut-e `Abdollah;Jask;Masjed Soleyman;Khowrmuj;Eshtehard;Abarkuh;Yasuj;Qasr-e Shirin;Babol;Shahr-e Babak;Neyshabur;Gonabad;Sahneh;Manujan;Khomeyni Shahr;Sardasht;Dezful;Tonekabon;Salmas;Sarvestan;Rasht;Khowy;Rudsar;Malard;Naqadeh;Gilan-e Gharb;Safashahr;Hashtgerd;Kazerun;Orumiyeh;Eyvan;Natanz;Kashan;Mahdishahr;Khonj;Gonbad-e Kavus;Iran;Divandarreh;Shahriar;Bardsir;Konarak;Qeshm;Eslamabad-e Gharb;Arakvaz-e Malekshahi;Bandar-e Mahshahr;Poldokhtar;Qasr-e Qand;Surak;Bandar-e Deylam;Rezvanshahr;Lamerd;Khvaf;Garmeh;Jahrom;Malayer;Tafresh;Jajarm;Mianeh;Bandar-e Lengeh;Shahrud;Dehdasht;Qal`eh Ganj;Sarab;Nahavand;Bandar-e Genaveh;Minudasht;Haftkel;Baft;Mohammadabad;Shazand;Bonab;Fereydun Kenar;Deyr;Behshahr;Tehran;Marivan;Bandar-e Anzali;Behbahan;Sirjan;Famenin;Ardakan;Saveh;Chabahar;Gorgan;Neka;Borazjan;Shadegan;Abyek;Iranshahr;Bagh-e Malek;Falavarjan +CD;Gemena;Inongo;Mbandaka;Kindu;Goma;Isiro;Mbuji-Mayi;Lusambo;Kabinda;Gbadolite;Matadi;Kananga;Bandundu;Lisala;Buta;Kenge;Kisangani;Bunia;Kamina;Kolwezi;Kalemie;Congo (Kinshasa);Tshikapa;Boende;Lubumbashi;Bukavu;Kinshasa +GB;London +FR;Apt;Evreux;Nanterre;Lorient;Fougeres;Vierzon;Arcachon;Chambery;Mantes-la-Jolie;Briancon;Evry;Cholet;Chateau-Gontier;Luneville;Soissons;Parthenay;Avignon;Sarlat-la-Caneda;Saint-Brieuc;Clermont-Ferrand;Paris;Bordeaux;Besancon;Toulon;Arles;Thionville;Grenoble;Albi;Tours;Issoudun;Colmar;Lens;Torcy;Avranches;Ales;Toul;Draguignan;Sarcelles;Boulogne-Billancourt;Nogent-sur-Marne;Bourg-en-Bresse;Pau;Riom;Abbeville;Marseille;Rennes;Bergerac;Cherbourg;Vannes;Tarbes;Montargis;Grasse;Beauvais;Vendome;Cognac;Vienne;Senlis;Metz;Tulle;Bonneville;Foix;Epernay;Meaux;Amiens;Cergy;Caen;Laon;Perpignan;Vesoul;Cahors;Angouleme;Castres;Chalons-en-Champagne;Palaiseau;Aurillac;Dax;Antony;Orleans;Compiegne;Douai;Montauban;Carpentras;Figeac;Limoges;Mulhouse;Bourges;Digne-les-Bains;Strasbourg;Lille;Dijon;Epinal;Rambouillet;Le Mans;Chateauroux;Rodez;Marmande;Blois;Pontarlier;Troyes;La Fleche;Fontainebleau;Bernay;Creteil;Chartres;Saint-Julien-en-Genevois;Bressuire;Alencon;Issoire;Nantes;Nancy;Belfort;Castelsarrasin;Provins;Le Havre;La Roche-sur-Yon;Gap;Cosne sur Loire;Saint-Denis;Boulogne-sur-Mer;Autun;Toulouse;Mayenne;Auxerre;Mont-de-Marsan;Sedan;Saint-Omer;Nevers;Bethune;Perigueux;Les Sables-d'Olonne;Angers;Roanne;Chateaudun;Saint-Quentin;Bayeux;Mende;Laval;Lannion;Montbrison;Saint-Dizier;Verdun;Pamiers;Chatellerault;Oloron-Sainte-Marie;Saint-Gaudens;Vitry-le-Francois;Clermont;Etampes;France;Chalon-sur-Saone;Dole;Villefranche-sur-Saone;Dinan;Chateaubriant;Valence;Limoux;Saint-Lo;Melun;Valenciennes;Bayonne;Thonon-les-Bains;Millau;Chaumont;Chateau-Thierry;Poitiers;Thiers;Le Raincy;Lyon;Sarreguemines;Carcassonne;Saint-Etienne;Aix-en-Provence;Bobigny;Saint-Nazaire;Rochefort;Libourne;Auch;Lons-le-Saunier;Pontivy;Villefranche-de-Rouergue;Istres;Cambrai;Sarrebourg;Le Puy-en-Velay;Molsheim;Dieppe;Sens;Villeneuve-sur-Lot;Vire;Pithiviers;Beziers;Brive-la-Gaillarde;Saint-Die-des-Vosges;Dunkerque;Charleville-Mezieres;Forbach;Albertville;Nice;Ussel;Nogent-le-Rotrou;Brignoles;Saverne;Moulins;Nimes;Annecy;L'Hay-les-Roses;La Rochelle;Bar-le-Duc;Gex;Saumur;Saintes;Argenteuil;Belley;Calais;Montbeliard;Saint-Claude;Lisieux;Redon;Narbonne;Beaune;Fontenay-le-Comte;Reims;Muret;Haguenau;Morlaix;Montlucon;Selestat;Gueret;Brest;Niort;Privas;Versailles;Tournon-sur-Rhone;Rouen;Saint-Germain-en-Laye;Arras;Quimper;Saint-Amand-Montrond;Macon;Agen;Vichy;Dreux;Ajaccio;Argentan;Montpellier;Saint-Malo +PE;Cusco;Chachapoyas;Puno;Trujillo;Abancay;Ayacucho;Pucallpa;Chiclayo;Pampas;Cerro de Pasco;Iquitos;Ica;Huacho;Arequipa;Piura;Huancavelica;Peru;Callao;Tacna;Puerto Maldonado;Huancayo;Huanuco;Tumbes;Moyobamba;Moquegua;Lima;Cajamarca;Contamana;Huaraz +AO;Sumbe;Ndalatando;Malanje;Benguela;Ondjiva;Luanda;Angola;Caxito;Huambo;Uige;Cabinda;Dundo;Luena;Mocamedes;Mbanza Kongo;Lubango;Saurimo;Menongue;Cuito +MY;Labuan;Putrajaya;Kota Bharu;Kuantan;George Town;Kuala Terengganu;Malaysia;Kota Kinabalu;Kangar;Alor Setar;Shah Alam;Johor Bahru;Kuching;Kuala Lumpur;Ipoh;Melaka;Seremban;Tumpat +ZA;Polokwane;Bhisho;Cape Town;Nelspruit;Pretoria;Mahikeng;Pietermaritzburg;Bloemfontein;South Africa;Johannesburg;Kimberley +CO;Yarumal;Cucuta;Neiva;Cajica;Arboledas;Puerto Berrio;Ipiales;Padilla;Pradera;Ospina;Cienaga;Cicuco;Darien;Chia;El Carmen de Bolivar;Mitu;Muzo;Cachira;Piedecuesta;San Jose del Fragua;Tauramena;Morroa;Jerico;Puerto Boyaca;Candelaria;Itagui;San Carlos de Guaroa;El Dovio;San Jose del Guaviare;Popayan;Mistrato;Vistahermosa;Garagoa;Cartago;La Apartada;Manaure;Zarzal;Marquetalia;Combita;Medellin;Tumaco;San Bernardo del Viento;San Zenon;San Jeronimo;Ituango;Ansermanuevo;Chima;Socota;Cartagena;Totoro;Curiti;Sopetran;Moniquira;Tarqui;Abejorral;Genova;Bosconia;Samaniego;Puerto Libertador;Agustin Codazzi;Istmina;Nemocon;Santa Rosa de Cabal;Balboa;Guatica;Tame;Curumani;Fundacion;Sonson;Santo Domingo;Monteria;La Jagua de Ibirico;El Paujil;Angostura;El Carmen;Pacora;Los Cordobas;Aracataca;Buesaco;Susa;Tocaima;Puerto Gaitan;El Retorno;Puerto Lopez;La Argentina;Espinal;Guarne;Toledo;Pereira;Pelaya;Mosquera;Arjona;San Jacinto;San Vicente;Apia;Carmen de Apicala;Guamal;Tocancipa;Mocoa;San Estanislao;El Banco;Maicao;Villagarzon;Sabanalarga;Salgar;Iscuande;Pasca;Leticia;San Juan de Uraba;Cajamarca;Cotorra;Pueblo Bello;Jambalo;Monitos;El Copey;Inirida;Carmen de Viboral;Tuchin;Nobsa;Anori;Pacho;El Cairo;Chinu;Sibate;Juan de Acosta;El Tarra;Guaitarilla;Jurado;Cimitarra;Valdivia;Ponedera;Villahermosa;Pie de Pato;Guadalajara de Buga;San Antonio del Tequendama;Unguia;Sesquile;La Palma;Rionegro;Obando;Morales;Palestina;El Reten;Piamonte;San Juan de Arama;La Cruz;Puerto Concordia;La Plata;Fortul;San Lorenzo;Caparrapi;Acandi;Norosi;Chivolo;Cerete;Ovejas;Montecristo;Momil;Soacha;Caldono;Aipe;Samaca;Rovira;Caicedonia;Tierralta;Arauquita;Bolivar;Palmito;Maria la Baja;Pupiales;Aquitania;Viterbo;Dagua;Guateque;Malaga;San Bernardo;Sabanagrande;La Tebaida;Tado;Pasto;Andalucia;Toribio;Puerto Santander;Zambrano;Entrerrios;Condoto;Sincelejo;Floridablanca;Potosi;Zapatoca;Sampues;Yotoco;San Marcos;Tena;Manzanares;Villapinzon;Cunday;Apartado;Melgar;Roldanillo;Puerto Wilches;Fusagasuga;Guaduas;Tubara;Belen de Umbria;El Carmen de Chucuri;Tuta;Coyaima;Plato;Distraccion;Guayabal;San Sebastian de Buenavista;Tenjo;Santander de Quilichao;Coconuco;Gamarra;Villa de San Diego de Ubate;San Carlos;Baranoa;Saldana;Purisima de la Concepcion;Saravena;Suaita;Zipaquira;Choconta;Chigorodo;Iles;Sogamoso;Armenia;San Benito Abad;Natagaima;Viota;San Martin de Loba;Gachancipa;Pueblo Rico;Santa Genoveva de Docordo;Restrepo;Suarez;Pijino del Carmen;Sucre;Mani;Piendamo;Facatativa;Santuario;Silvia;El Playon;Santo Tomas;Los Patios;Tiquisio;Los Santos;Monterrey;La Montanita;Chachagui;Caqueza;Algarrobo;Caldas;Neira;San Gil;Puerto Lleras;Pizarro;Trinidad;Betania;Barbacoas;Honda;El Carmen de Atrato;La Estrella;Chinacota;Puerto Colombia;El Doncello;Turbana;Suesca;San Pablo;Colombia;Quinchia;Solano;Toca;Barrancabermeja;Paipa;Betulia;La Macarena;Tibu;San Fernando;Suaza;El Cerrito;Nuqui;San Luis de Since;Urumita;Sevilla;San Juan del Cesar;Majagual;Achi;Caucasia;Choachi;La Ceja;San Martin;San Juan Nepomuceno;Donmatias;Purificacion;Gigante;Simijaca;Cisneros;La Virginia;Aguazul;Tesalia;Narino;El Tambo;Supia;Mogotes;Puerto Salgar;Riofrio;Galeras;Santa Marta;Clemencia;Sabana de Torres;Guasca;Agua de Dios;Oiba;Teorama;Cumbal;Castilla La Nueva;Necocli;Florida;Villeta;Caloto;Rio de Oro;Velez;La Gloria;San Jacinto del Cauca;Bello;Bucaramanga;Palmar de Varela;Aguadas;La Mesa;El Tablon;Chitaga;Bocas de Satinga;Puerto Narino;Villa Rica;Covenas;Barbosa;San Rafael;Fresno;Simiti;Mahates;Argelia;Puerto Carreno;Orocue;Villa de Leyva;Santa Rosa de Viterbo;Segovia;Yacuanquer;Cienaga de Oro;San Onofre;Amalfi;Frontino;El Pinon;La Esperanza;El Bordo;Taminango;San Pedro de Uraba;Puerto Nare;Hatillo de Loba;Zaragoza;Ginebra;El Aguila;La Victoria;Santa Catalina;Granada;Montelibano;San Alberto;Fuente de Oro;Nunchia;Fonseca;Ubala;San Antonio;Filadelfia;Palocabildo;Bogota;Puerto Escondido;Guacheta;Carepa;Funza;Valencia;Sardinata;Flandes;Sotomayor;Consaca;Calamar;Tibana;Lerida;Tausa;Ibague;Arauca;Bojaca;Guadalupe;Mutis;Marsella;Turbo;Socorro;Timbiqui;Rioblanco;Sibundoy;Ricaurte;Carlosama;Yumbo;La Calera;Isnos;Dosquebradas;San Sebastian;Timbio;Yondo;Tamesis;Iquira;Malambo;Acacias;Lejanias;Puerto Triunfo;Trujillo;Pamplona;Miranda;Planadas;San Pelayo;Acevedo;Aguachica;San Juan de Betulia;Guatape;Becerril;Santiago de Tolu;Retiro;Tibasosa;Cartagena del Chaira;Turbaco;Algeciras;Rosas;Giron;Usiacuri;El Zulia;San Roque;Yopal;Carmen de Carupa;Villanueva;Arbelaez;San Sebastian de Mariquita;El Colegio;Guaranda;Magangue;Puerto Tejada;Aranzazu;La Dorada;Amaga;Mercaderes;Campamento;Envigado;El Roble;Puerto Rico;Villavicencio;Margarita;Copacabana;Circasia;Puente Nacional;Sandona;Yali;Puerto Asis;Tabio;Linares;Astrea;Santa Barbara de Pinto;Santa Barbara;Raquira;Junin;Vijes;El Paso;Puerto Leguizamo;Convencion;Timana;El Rosal;Santa Lucia;Chiquinquira;Abrego;Altos del Rosario;Talaigua Nuevo;La Union;Canasgordas;Palermo;Valledupar;Guacari;Sopo;Caceres;San Pedro;Alcala;Siachoque;Pivijay;Charala;Arboletes;Nueva Granada;Chita;Tunja;Sahagun;Uribia;El Dificil;La Sierra;Coello;Riosucio;Hatonuevo;Planeta Rica;Cumaral;Salamina;Tuquerres;Ocana;Garzon;Manati;Risaralda;San Andres de Sotavento;Anolaima;Toro;Ayapel;Paz de Ariporo;El Bagre;San Vicente de Chucuri;Suan;Girardota;Galapa;Palmira;Umbita;Campoalegre;Pensilvania;Buenaventura;San Agustin;Chinchina;Cumaribo;Albania;Dibulla;Madrid;San Diego;Subachoque;Taraza;Fredonia;Cogua;Chiriguana;Repelon;Landazuri;Briceno;Pital;Guachucal;Yolombo;Cali;Arroyohondo;Buenos Aires;Fomeque;Alvarado;Rivera;Sitionuevo;Belalcazar;Anapoima;San Jose;Barranco de Loba;Marinilla;Santa Rosa del Sur;Jardin;Saboya;Pitalito;Villamaria;Buenavista;La Florida;Ortega;Orito;Vegachi;Mutata;Cocorna;Anzoategui;Ebejico;Lenguazaque;Ciudad Bolivar;Urrao;La Primavera;Yacopi;Chaparral;Barranquilla;Pueblo Nuevo;Riohacha;Corinto;San Antero;La Vega;Yaguara;Sabaneta;Quibdo;Santa Rosa de Osos;San Vicente del Caguan;Inza;Silvania;La Belleza;Regidor;Villa del Rosario;Bugalagrande;San Calixto;Ramiriqui;Quimbaya;Filandia;Andes;Concordia;Falan;Teruel;Medina;Santa Rosa;Caimito;Montenegro;Hacari;Tolu Viejo;Tenerife;Canalete;Nechi;Pailitas;Puerto Caicedo;Duitama;Jamundi;Anserma;Gacheta;Venecia;Lorica;Soledad;Guapi;Gonzalez;Cajibio;Bellavista;Angelopolis;Campo de la Cruz;Tello;Pesca;Dabeiba;Libano;Almaguer;Puebloviejo;Cachipay;Lloro;Santa Fe de Antioquia;Remedios;Belen de los Andaquies;Chimichagua;Guachaves;Oporapa;Ataco;Cordoba;Venadillo;Manizales;Barrancas;Titiribi;Los Palmitos;Calarca;Tamalameque;Samana;Sasaima;El Charco;Saladoblanco;Pinillos;Polonuevo;Bagado;Guamo;Mesetas;Corozal;Luruaco;Icononzo;Cota;Tulua;Paispamba;San Andres;Florencia;Girardot;Santa Ana;Puerto Guzman;Ventaquemada;Solita;Une +TZ;Chake Chake;Zanzibar;Arusha;Bukoba;Vwawa;Mahonda;Bariadi;Shinyanga;Tanzania;Morogoro;Moshi;Mbeya;Mkokotoni;Lindi;Singida;Njombe;Kigoma;Koani;Iringa;Wete;Mpanda;Geita;Mtwara;Mwanza;Kibaha;Dar es Salaam;Sumbawanga;Dodoma;Musoma;Songea;Tabora;Babati;Tanga +SD;Sudan;Singa;Khartoum;Wad Medani;Ed Daein;Rabak;El Obeid;Port Sudan;Kadugli;Ed Damer;El Fasher;Gedaref;Dongola;Nyala;Kassala;El Geneina;Ed Damazin;Zalingei;El Fula +HK;Hong Kong +SA;Medina;Saudi Arabia;Jazan;Al Qurayyat;Abha;Buraydah;Mecca;Riyadh;Sakaka;Al Bahah;Ha'il;Al Wajh;Arar;Ad Dammam;Tabuk;Najran +CL;Cochrane;Pozo Almonte;Osorno;Coquimbo;Cauquenes;Puerto Montt;Puente Alto;Linares;Valparaiso;San Antonio;Chillan;Arica;Iquique;Antofagasta;La Serena;Los Andes;Angol;Chile;Melipilla;Talagante;Lebu;Valdivia;Santiago;La Ligua;Castro;San Felipe;Colina;Calama;Concepcion;Pichilemu;Copiapo;Illapel;San Bernardo;Curico;La Union;Rancagua;Punta Arenas;Los Angeles;Chanaral;Puerto Williams;Coyhaique;Talca;Ovalle;Temuco;Quillota;Tocopilla;Quilpue;Vallenar;Puerto Aysen;Puerto Natales;San Fernando +ES;Merida;Castellon de la Plana;Barcelona;Bilbao;Granada;Guadalajara;Salamanca;Cuenca;Santander;Sevilla;Valladolid;Pamplona;A Coruna;Huelva;Ciudad Real;Lugo;Cordoba;Vitoria-Gasteiz;Girona;Santa Cruz;Pontevedra;Logrono;Badajoz;Jaen;Valencia;Tarragona;Avila;Santiago de Compostela;Cadiz;Oviedo;Palencia;Teruel;Toledo;Lleida;Malaga;Zamora;Albacete;Murcia;Almeria;Ourense;Donostia;Burgos;Caceres;Segovia;Huesca;Soria;Leon;Spain;Zaragoza;Madrid;Alicante;Las Palmas;Palma +IQ;Erbil;Al Kut;Jamjamal;Ar Rutbah;`Aynkawah;Sawran;Al Kufah;Halabjah;Ba`qubah;Ash Shamiyah;Kifri;Anah;Kuysinjaq;Sayyid Sadiq;Ash Shaykhan;Al Hillah;Karbala';Al Hayy;Kirkuk;Al Hindiyah;Qaladizay;Rawah;Makhmur;Baghdad;Abi al Khasib;Al Basrah;Al Hamzah;Samarra';Al Qa'im;Al `Aziziyah;Al Musayyib;Az Zubayr;Iraq;Saymayl;As Suwayrah;Al Fallujah;Mosul;Kalar;Al Faw;Al Hamdaniyah;Shaqlawah;Ar Ramadi;Al `Amarah;Hit;Tall `Afar;Al Jabayish;Tuz Khurmatu;Al Qurnah;An Nasiriyah;Ash Shatrah;`Aqrah;Zakhu;As Samawah;Rawanduz;Al Hashimiyah;Sinjar;Batufah;Al `Amadiyah;An Najaf;An Nu`maniyah;Abu Ghurayb;Dahuk;Balad;Bayji;Khanaqin;Raniyah;As Sulaymaniyah;Al Miqdadiyah;Hadithah;Tikrit;Ad Diwaniyah;Ar Rumaythah +SG;Singapore +CM;Ebolowa;Bafoussam;Buea;Ngaoundere;Cameroon;Bertoua;Garoua;Maroua;Yaounde;Douala;Bamenda +CA;Ottawa;Quebec City;Halifax;Toronto;Victoria;Regina;Iqaluit;Edmonton;Fredericton;Winnipeg;Yellowknife;St. John's;Whitehorse;Canada;Charlottetown +KE;Kisumu;Lodwar;Iten;Kapenguria;Kajiado;Kerugoya;Murang'a;Kenya;Maralal;Wote;Narok;Bungoma;Meru;Migori;Wajir;Homa Bay;Mandera;Embu;Nyamira;Kitale;Kiambu;Ol Kalou;Mombasa;Kitui;Eldoret;Hola;Rumuruti;Marsabit;Kisii;Nakuru;Lamu;Nairobi;Busia;Garissa;Kapsabet;Kwale;Kericho;Kabarnet;Mwatate;Sotik Post;Kakamega;Siaya;Isiolo;Machakos;Nyeri +MM;Hpa-An;Pathein;Loikaw;Monywa;Burma;Myitkyina;Dawei;Magway;Sittwe;Mandalay;Mawlamyine;Rangoon;Taunggyi;Hakha;Nay Pyi Taw;Bago +US;Salem;Sacramento;Helena;Jackson;Harrisburg;Albany;Little Rock;Lincoln;Hartford;Honolulu;Augusta;Boston;Jefferson City;Columbus;Cheyenne;Charleston;Baton Rouge;Bismarck;Concord;Springfield;Des Moines;St. Paul;Pierre;Boise;Atlanta;Dover;Annapolis;Nashville;Providence;Juneau;Denver;Phoenix;Austin;Richmond;Oklahoma City;Montgomery;Lansing;Frankfort;Montpelier;Tallahassee;Carson City;Salt Lake City;Olympia;Topeka;United States;Santa Fe;Madison;Columbia;Washington;Trenton;Indianapolis;Raleigh +AU;Perth;Hobart;Canberra;Brisbane;Adelaide;Australia;Melbourne;Darwin;Sydney +CI;Duekoue;Adzope;Dimbokro;Minignan;Yamoussoukro;Aboisso;Dabakala;Soubre;Sinfra;Toumodi;Gagnoa;Seguela;San-Pedro;Abengourou;Grand-Bassam;Daoukro;Dabou;Bouna;Man;Kong;Daloa;Ferkessedougou;Côte d’Ivoire;Korhogo;Agboville;Biankouma;Sassandra;Bondoukou;Boundiali;Touba;Abidjan;Bangolo;Oume;Bouafle;Mankono;Divo;Bingerville;Jacqueville;Odienne;Katiola;Bongouanou;Guiglo;Grand-Lahou;Sipilou;Bouake +DE;;Meschede;Freiburg im Breisgau;Ansbach;Aichach;Tauberbischofsheim;Landshut;Lauterbach;Sigmaringen;Sommerda;Chemnitz;Kunzelsau;Stadthagen;Greiz;Kleve;Gottingen;Recklinghausen;Parchim;Esslingen;Ratzeburg;Husum;Wolfenbuttel;Hagen;Lorrach;Peine;Hanau;Helmstedt;Cuxhaven;Lichtenfels;Villingen-Schwenningen;Gorlitz;Hassfurt;Mannheim;Germany;Stade;Osterode;Bad Ems;Augsburg;Heidelberg;Siegen;Halle;Bad Kissingen;Luneburg;Pfarrkirchen;Forst (Lausitz);Wittmund;Bad Schwalbach;Neunkirchen;Prenzlau;Neuwied;Kempten;Jena;Hannover;Ravensburg;Detmold;Bad Neuenahr-Ahrweiler;Euskirchen;Aurich;Bad Segeberg;Celle;Bad Salzungen;Hamburg;Schwabisch Hall;Schwelm;Eisenberg;Leer;Gunzburg;Baden-Baden;Emmendingen;Ulm;Limburg;Erlangen;Stralsund;Bayreuth;Wurzburg;Schwandorf;Salzgitter;Munster;Regensburg;Bad Tolz;Bottrop;Ludwigsburg;Haldensleben;Saarbrucken;Pinneberg;Altenburg;Passau;Kronach;Eisenach;Furth;Tirschenreuth;Heilbronn;Cottbus;Biberach;Gelsenkirchen;Bergisch Gladbach;Kaufbeuren;Tubingen;Saarlouis;Merzig;Plon;Balingen;Meppen;Minden;Kassel;Gera;Eberswalde;Straubing;Sonthofen;Bautzen;Germersheim;Frankenthal;Worms;Meiningen;Reutlingen;Erfurt;Pirmasens;Dusseldorf;Nuremberg;Bitburg;Donauworth;Karlsruhe;Miesbach;Herford;Memmingen;Delmenhorst;Neumunster;Greifswald;Duren;Duisburg;Unna;Vechta;Sondershausen;Hamm;Kelheim;Aschaffenburg;Neu-Ulm;Stuttgart;Oldenburg;Dingolfing;Monchengladbach;Zweibrucken;Schleswig;Nordhorn;Wilhelmshaven;Lauf;Potsdam;Senftenberg;Darmstadt;Kiel;Neubrandenburg;Wiesbaden;Waldshut-Tiengen;Mettmann;Torgau;Solingen;Salzwedel;Apolda;Traunstein;Uelzen;Altotting;Bad Homburg;Kulmbach;Bernburg;Hildburghausen;Gummersbach;Lubeck;Bad Kreuznach;Frankfurt;Bamberg;Weissenburg;Remscheid;Luchow;Saalfeld;Borken;Bremen;Weimar;Dresden;Brake;Arnstadt;Gotha;Neumarkt;Montabaur;Jever;Bad Oldesloe;Ebersberg;Speyer;Heinsberg;Perleberg;Freudenstadt;Diepholz;Neuss;Essen;Burg;Trier;Luckenwalde;Schleiz;Magdeburg;Frankfurt (Oder);Boblingen;Berlin;Siegburg;Wetzlar;Ingolstadt;Rathenow;Rottweil;Pfaffenhofen;Calw;Merseburg;Cham;Waiblingen;Bad Hersfeld;Eschwege;Hof;Herne;Neustadt;Wesel;Mainz;Amberg;Eichstatt;Hameln;Sankt Wendel;Neuburg;Fulda;Soest;Ludwigshafen;Wunsiedel;Northeim;Paderborn;Bad Neustadt;Leipzig;Bad Reichenhall;Marktoberdorf;Giessen;Pforzheim;Annaberg-Buchholz;Viersen;Bad Fallingbostel;Homburg;Neuruppin;Heppenheim;Eutin;Gross-Gerau;Rendsburg;Goslar;Coburg;Korbach;Landsberg;Gutersloh;Meissen;Pirna;Braunschweig;Wolfsburg;Schwerin;Wismar;Plauen;Kaiserslautern;Offenburg;Bochum;Forchheim;Naumburg;Stendal;Heidenheim;Bonn;Kitzingen;Munich;Krefeld;Bielefeld;Mulheim;Mosbach;Sangerhausen;Steinfurt;Cloppenburg;Regen;Aachen;Landau;Coesfeld;Miltenberg;Mindelheim;Weilheim;Hoxter;Bergheim;Verden;Koblenz;Leverkusen;Emden;Alzey;Friedberg;Halberstadt;Ingelheim;Bad Durkheim;Heilbad Heiligenstadt;Oranienburg;Muhlhausen;Goppingen;Lubben (Spreewald);Muhldorf;Brandenburg;Furstenfeldbruck;Freiberg;Osterholz-Scharmbeck;Zwickau;Dillingen;Wittlich;Warendorf;Heide;Gifhorn;Hofheim;Winsen;Dortmund;Erbach;Freising;Deggendorf;Holzminden;Hildesheim;Aalen;Osnabruck;Itzehoe;Dachau;Marburg;Schwabach;Wuppertal;Tuttlingen;Sonneberg;Rosenheim;Rastatt;Suhl;Herzberg;Nordhausen;Olpe;Gustrow;Starnberg;Erding;Oberhausen;Ludenscheid;Flensburg;Westerstede +AF;Tarin Kot;Qalat;Panjab;Ghazni;Mama Khel;Imam Sahib;Sangin;Maimanah;Bazarak;Lashkar Gah;Fayroz Koh;Ghoriyan;Maidan Shahr;Bagrami;Qal`ah-ye Now;Farah;Qarqin;Asadabad;Mazar-e Sharif;Baghlan;Nili;Faizabad;Karukh;Baraki Barak;Taywarah;Sar-e Pul;Sharan;Khulm;Zaranj;Kuhsan;Pul-e `Alam;Charikar;Bamyan;Taluqan;Gardez;Kunduz;Afghanistan;Andkhoy;Khanabad;Balkh;Khost;Kabul;Shibirghan;Aqchah;Herat;Jalalabad;Pul-e Khumri;Mahmud-e Raqi;Mehtar Lam;Parun;Kandahar;Aibak +ML;Nioro;Koulikoro;Kolondieba;Bandiagara;Yelimane;Baroueli;Menaka;Sikasso;Kidal;Kadiolo;Mali;Bamako;Dire;Kangaba;Gourma Rharous;Tominian;Djenne;Gao;Youwarou;Kayes;Douentza;Ansongo;Yorosso;San;Banamba;Tenenkou;Bourem;Bafoulabe;Kita;Koro;Niono;Bougouni;Timbuktu;Koutiala;Nara;Mopti;Goundam;Kati;Segou;Tessalit;Bankass;Diema +JO;Ar Ramtha;Al Juwayyidah;Az Zarqa';Ma`an;Muthallath al Azraq;Al `Aqabah;Jordan;As Salt;Busayra;`Ajlun;Al Hisn;Al Jizah;Ash Shunah ash Shamaliyah;Dayr Abu Sa`id;Irbid;Amman;Jarash;Ar Rusayfah;Al Karak;Al Fuhays;Sahab;Madaba;Al Mazar ash Shamali;Al Mafraq;At Tafilah +MA;Guelmim;Marrakech;Beni Mellal;Agadir;Tangier;Morocco;Oujda-Angad;Tifariti;Errachidia;Casablanca;Fes;Rabat +GH;Kumasi;Techiman;Cape Coast;Damongo;Accra;Sunyani;Ho;Bolgatanga;Dambai;Wa;Ghana;Sekondi;Tamale;Koforidua;Goaso;Nalerigu;Sefwi Wiawso +DZ;El Golea;Mila;Tindouf;Chlef;Bordj Bou Arreridj;Relizane;El Oued;Tlemcen;Khenchela;El Tarf;Oum el Bouaghi;Boumerdes;Skikda;Tizi Ouzou;Guelma;Sidi Bel Abbes;Bordj Mokhtar;El Bayadh;Algeria;I-n-Salah;Bechar;Ouled Djellal;Ain Temouchent;Mostaganem;Laghouat;El Meghaier;Adrar;Setif;Algiers;Blida;Jijel;Mascara;Oran;Ghardaia;Touggourt;Ouargla;Tissemsilt;Naama;Djanet;Djelfa;In Guezzam;Saida;M'Sila;Bouira;Souk Ahras;Annaba;Tamanrasset;Beni Abbes;Ain Defla;Biskra;Tebessa;Constantine;Tiaret;Illizi;Medea;Bejaia;Tipasa;Batna;Timimoun +AE;United Arab Emirates;Ra's al Khaymah;Sharjah;Dubai;Abu Dhabi;Umm al Qaywayn;Al Fujayrah;`Ajman +GR;Kaisariani;Salamina;Drama;Giannitsa;Larisa;Tyrnavos;Palaio Faliro;Alimos;Chios;Katerini;Nea Ionia;Kiato;Kalymnos;Agios Dimitrios;Pefki;Greece;Ano Liosia;Spata;Zacharo;Irakleia;Mesolongi;Karpenisi;Kos;Voula;Kalamaria;Patra;Serres;Aliartos;Nea Filadelfeia;Chalandri;Kilkis;Koufalia;Rethymno;Didymoteicho;Aigaleo;Ptolemaida;Argos;Kifisia;Psychiko;Panorama;Edessa;Nea Moudania;Acharnes;Vyronas;Lamia;Rafina;Veroia;Thessaloniki;Ioannina;Ierapetra;Korinthos;Kavala;Agrinio;Chaidari;Thermi;Kerkyra;Polygyros;Agios Nikolaos;Piraeus;Moschato;Alexandroupoli;Kranidi;Gazi;Korydallos;Agia Varvara;Aigio;Ermoupoli;Amaliada;Elefsina;Skydra;Naousa;Agia Paraskevi;Ano Syros;Aspropyrgos;Sindos;Livadeia;Farsala;Peraia;Xanthi;Nea Smyrni;Stavroupoli;Paiania;Mandra;Metamorfosi;Gerakas;Arta;Sparti;Komotini;Argostoli;Tripoli;Peristeri;Sykies;Zografos;Nikaia;Athens;Evosmos;Siteia;Petroupoli;Agioi Anargyroi;Nafpaktos;Mytilini;Dafni;Florina;Argos Orestiko;Kastoria;Perama;Karditsa;Kozani;Volos;Alexandreia;Kalamata;Kallithea;Vrilissia;Irakleio;Koropi;Orestiada;Argyroupoli;Oraiokastro;Marousi;Rodos;Zakynthos;Pyrgos;Galatsi;Amfilochia;Ilioupoli;Thebes;Xylokastro;Trikala;Ilion;Loutraki;Megara;Chrysoupoli;Chania;Nafplio;Grevena;Cholargos;Lagkadas;Ampelokipoi;Melissia;Glyfada;Messini;Kalyvia Thorikou;Lefkada;Keratsini;Preveza;Chalkida +ET;Harar;Ethiopia;Jijiga;Semera;Mekele;Awasa;Asosa;Dire Dawa;Bahir Dar;Addis Ababa;Gambela;Bonga +TW;Taibao;Taitung;Keelung;Zhubei;Banqiao;Changhua;Tainan;Taoyuan District;Pingtung;Douliu;Taipei;Hualien;Jincheng;Magong;Taichung;Yilan;Hsinchu;Nantou;Nangan;Taiwan;Chiayi;Kaohsiung;Miaoli +GT;Pochuta;San Jose El Idolo;San Carlos Alzatate;Concepcion Tutuapa;San Pedro Carcha;Quezaltepeque;San Juan Chamelco;Comitancillo;San Cristobal Totonicapan;Uspantan;El Asintal;Cabrican;Coatepeque;Sacapulas;Tiquisate;Casillas;Patzicia;San Miguel Ixtahuacan;Magdalena Milpas Altas;Santa Catarina Ixtahuacan;Livingston;Rabinal;Jacaltenango;San Juan Cotzal;Teculutan;Huite;Salama;Santa Maria Chiquimula;Santa Catarina Mita;Pasaco;Concepcion Chiquirichapa;San Pedro Sacatepequez;Raxruha;Tacana;Champerico;San Luis Jilotepeque;San Miguel Panan;El Progreso;Huitan;San Vicente Pacaya;San Lucas Sacatepequez;San Jose Ojetenam;Pastores;La Maquina;San Rafael Petzal;Santo Domingo Xenacoj;Santiago Chimaltenango;Quetzaltenango;Jocotan;Santa Ana Huista;Chuarrancho;Jalapa;Yupiltepeque;Concepcion Las Minas;Santo Tomas La Union;Tucuru;San Sebastian Coatan;San Jose Acatempa;Ciudad Vieja;Guanagazapa;Parramos;San Juan Sacatepequez;La Esperanza;Santa Lucia Cotzumalguapa;Santa Cruz Mulua;San Marcos;Tectitan;Zaragoza;San Juan Ixcoy;Mixco;San Juan Bautista;Almolonga;San Sebastian Huehuetenango;San Antonio La Paz;San Bartolome Milpas Altas;San Pedro Pinula;San Juan La Laguna;Chimaltenango;San Gaspar Ixchil;Pueblo Nuevo Vinas;Zacualpa;Coban;San Andres Sajcabaja;Chicacao;Canilla;Nuevo San Carlos;Santa Lucia La Reforma;Cubulco;Santa Clara La Laguna;San Pedro Ayampuc;Santo Domingo Suchitepequez;Santa Cruz del Quiche;San Pedro Jocopilas;Salcaja;Palestina de los Altos;Patulul;Agua Blanca;Ciudad Melchor de Mencos;Concepcion Huista;Cuyotenango;Solola;Poptun;Playa Grande;San Andres Itzapa;Santa Cruz Naranjo;Huehuetenango;San Luis;San Rafael La Independencia;Colotenango;Chichicastenango;El Chal;Escuintla;Ixchiguan;Santa Catarina Pinula;La Blanca;Moyuta;La Gomera;Momostenango;Jocotenango;Villa Canales;Chajul;San Jorge;Esquipulas Palo Gordo;El Chol;San Francisco El Alto;Tecpan Guatemala;Conguaco;Nahuala;Nenton;Cuilapa;San Agustin Acasaguastlan;Alotenango;Samayac;Santa Cruz Verapaz;Chinautla;Cuilco;Santa Lucia Milpas Altas;El Tumbador;Sumpango;Santa Eulalia;Tamahu;Taxisco;Purulha;San Jose Poaquil;San Sebastian;San Jose Pinula;San Antonio Suchitepequez;Cantel;Amatitlan;Joyabaj;Soloma;San Andres Xecul;Chiquimulilla;Comapa;Antigua Guatemala;Chisec;Sipacapa;Fraijanes;Gualan;Pajapita;San Cristobal Cucho;Malacatan;Pachalum;San Pablo Jocopilas;Cajola;Flores;Santa Lucia Utatlan;Totonicapan;San Martin Jilotepeque;Fray Bartolome de Las Casas;Asuncion Mita;Santa Rosa de Lima;San Raimundo;Zapotitlan;Usumatlan;Barberena;Guazacapan;San Lucas Toliman;Petapa;Palin;San Juan Atitan;Palencia;Esquipulas;San Rafael Pie de la Cuesta;Morazan;San Bartolo;Ixtahuacan;Todos Santos Cuchumatan;Acatenango;Senahu;Zunilito;Guatemala;San Mateo Ixtatan;San Pedro La Laguna;Rio Bravo;Olintepeque;Atescatempa;Tajumulco;San Francisco la Union;Ciudad Tecun Uman;San Pablo;Ostuncalco;Santiago Atitlan;El Palmar;San Francisco Zapotitlan;Patzun;San Antonio Palopo;Chiantla;Panzos;Jalpatagua;Panajachel;San Martin Zapotitlan;El Quetzal;Masagua;Santa Maria de Jesus;Lanquin;San Antonio Huista;Sanarate;San Miguel Duenas;El Estor;San Manuel Chaparron;Sibilia;San Francisco;San Martin Sacatepequez;Chinique;San Antonio Sacatepequez;San Andres Villa Seca;San Bernardino;Villa Nueva;Mataquescuintla;Santa Barbara;Jerez;Iztapa;Puerto Barrios;Siquinala;Santa Maria Ixhuatan;Cabanas;San Antonio Aguas Calientes;Flores Costa Cuca;Sibinal;Chicaman;Estanzuela;Chiche;Zunil;Dolores;San Felipe;Sayaxche;Chiquimula;Retalhuleu;Jutiapa;La Libertad;Tactic;San Pedro Necta;San Miguel Acatan;El Tejar;Nueva Concepcion;San Miguel Siguila;San Antonio Ilotenango;La Union;San Jose La Arada;Yepocapa;San Bartolome Jocotenango;Aguacatan;San Juan Ermita;Sansare;San Juan Tecuaco;Zacapa;San Carlos Sija;Monjas;Malacatancito;Santa Cruz Balanya;Olopa;Quesada;San Rafael Las Flores;Guastatoya;Guatemala City;Colomba;Camotan;Santiago Sacatepequez;Morales;San Miguel Chicaj;Mazatenango;Santa Apolonia;Santa Ana;Ocos;Cunen;Nueva Santa Rosa;Los Amates;La Democracia;San Cristobal Verapaz +KW;Kuwait City +HU;Mezotur;Balatonfured;Mateszalka;Karcag;Zalaegerszeg;Oroszlany;Nyirbator;Siklos;Hajduszoboszlo;Monor;Bicske;Sarkad;Varpalota;Hatvan;Balassagyarmat;Sopron;Nyiregyhaza;Kiskunfelegyhaza;Baja;Pecs;Gyomaendrod;Pilisvorosvar;Szarvas;Szekesfehervar;Szombathely;Tata;Gyongyos;Hodmezovasarhely;Sarbogard;Salgotarjan;Nagykallo;Dabas;Budakeszi;Gyal;Mor;Heves;Kiskunmajsa;Sarospatak;Gardony;Vecses;Celldomolk;Nagykata;Godollo;Dunaujvaros;Sarvar;Tatabanya;Tiszavasvari;Tapolca;Szekszard;Barcs;Kazincbarcika;Hajdunanas;Hungary;Erd;Tolna;Komlo;Komarom;Mohacs;Kiskoros;Esztergom;Bekes;Bekescsaba;Debrecen;Kecskemet;Oroshaza;Balmazujvaros;Batonyterenye;Balatonalmadi;Mosonmagyarovar;Dunakeszi;Rackeve;Marcali;Puspokladany;Bonyhad;Mako;Siofok;Edeleny;Szeged;Veszprem;Hajduboszormeny;Gyula;Ajka;Cegled;Eger;Tiszakecske;Kisvarda;Szentes;Jaszbereny;Mezokovesd;Hajduhadhaz;Csongrad;Papa;Szolnok;Kaposvar;Tiszaujvaros;Kormend;Satoraljaujhely;Szigetvar;Paszto;Nagykanizsa;Szentendre;Tiszafured;Ozd;Paks;Budapest;Torokszentmiklos;Gyor;Miskolc;Kalocsa;Kapuvar;Koszeg;Nagykoros;Dombovar;Keszthely;Nagyatad;Vac;Berettyoujfalu;Kiskunhalas;Csorna;Szigetszentmiklos +YE;Lahij;Raydah;Az Zaydiyah;Al Mukalla;Zinjibar;Hajjah;Aden;At Tawahi;Zabid;Yemen;Al Ma`alla';Dhamar;Bayt al Faqih;Bajil;Al Mahwit;Al Hudaydah;Mocha;Ash Shaykh `Uthman;Al Bayda';Say'un;Tarim;Rada`;Ash Shihr;`Amran;Ad Dali`;Ma`bar;Khamir;Hadibu;Al Mansurah;Ataq;Ta`izz;Yarim;Sa`dah;Jiblah;Al Ghayzah;Marib;Al Jabin;Ja`ar;Ibb;Sanaa;Al Hazm +UZ;Chortoq;Marhamat;Termiz;Dang`ara;Yangiariq;Toyloq Qishlog'i;Jondor Shaharchasi;Nukus;Galaosiyo Shahri;Andijon;Jizzax;Buloqboshi;Shahrixon;Yozyovon;Usmat Shaharchasi;Samarkand;Paxtaobod;Paxtakor Shahri;Qanliko`l;Gurlan;Oqtosh Shahri;Do'stlik Shahri;Gagarin Shahri;Qorako`l Shahri;Jomboy Shahri;Oltiariq;Uchquduq Shahri;Toshloq;Payariq Shahri;Kuyganyor;G'oliblar Qishlog'i;Baliqchi;Bukhara;Asaka;Xonqa;Poytug`;Quvasoy;Yangirabot;Juma Shahri;Yangi Mirishkor;Ishtixon Shahri;Vodil;Qo`rg`ontepa;Loyish Shaharchasi;Beshkent Shahri;G'ijduvon Shahri;Qarshi;Xo`jayli Shahri;Uchqurghon Shahri;Taxtako`pir;Qorovul;Vobkent Shahri;Romitan Shahri;Zomin Shaharchasi;Beshariq;G'allaorol Shahri;Namangan;Payshamba Shahri;Kattaqo'rg'on Shahri;Shofirkon Shahri;Hazorasp;Xo`jaobod;Shovot;Qo'shko'pir;Kegeyli Shahar;Farg`ona;Kogon Shahri;Kosonsoy;Uchtepa Qishlog'i;Nurobod Shahri;Yaypan;Urgut Shahri;Tashkent;Uzbekistan;Qorao`zak;Chimboy Shahri;Qorashina;G`uzor;Navoiy;Xiva;Qo`ng`irot Shahri;Quva;Bulung'ur Shahri;Rishton;Haqqulobod;Guliston;Ziyodin Shaharchasi;Bog'ot;Zarbdor Shaharchasi;Urganch +UA;Skvyra;Sevastopol;Henichesk;Zaporizhzhia;Chortkiv;Krasnohrad;Izmail;Ochakiv;Artsyz;Rozdilna;Horlivka;Svitlovodsk;Bershad;Bakhmut;Kalanchak;Bakhchysarai;Smila;Bilohirsk;Novhorod-Siverskyi;Tiachiv;Bila Tserkva;Kobeliaky;Znamianka;Rakhiv;Kiliia;Nikopol;Shyroke;Lviv;Novomyrhorod;Olevsk;Kalmiuske;Chaplynka;Dolyna;Skadovsk;Izium;Kryvyi Rih;Novohrad-Volynskyi;Storozhynets;Tarashcha;Hlobyne;Pokrovsk;Radomyshl;Tulchyn;Kovel;Tokmak;Dubno;Berehove;Uman;Rokytne;Novoazovsk;Piatykhatky;Zolotonosha;Lutsk;Nova Odesa;Tysmenytsia;Bohodukhiv;Kamianka-Dniprovska;Bilhorod-Dnistrovskyi;Vynohradiv;Kalynivka;Okhtyrka;Simferopol;Poltava;Nemyriv;Khmilnyk;Dolynska;Khotyn;Berezivka;Melitopol;Mykolaiv;Bilozerka;Sokyriany;Sumy;Myronivka;Alchevsk;Pervomaisk;Vyshhorod;Reshetylivka;Kosiv;Reni;Bobrynets;Kurman;Berdychiv;Makiivka;Kramatorsk;Khmelnytskyi;Sievierodonetsk;Yampil;Polohy;Zhytomyr;Shakhtarsk;Oleksandriia;Svatove;Berdiansk;Kamianets-Podilskyi;Drohobych;Marhanets;Ukraine;Chuhuiv;Yuzhnoukrainsk;Volnovakha;Kaharlyk;Bilovodsk;Fastiv;Mizhhiria;Dnipro;Kalush;Horodyshche;Chyhyryn;Tlumach;Zolochiv;Yakymivka;Zhmerynka;Lubny;Novoukrainka;Brovary;Berezhany;Hlyboka;Pohrebyshche;Mukacheve;Vesele;Kherson;Zalishchyky;Chervonohrad;Yavoriv;Novyi Buh;Kamin-Kashyrskyi;Snihurivka;Koriukivka;Prymorsk;Mariupol;Liubashivka;Dovzhansk;Enerhodar;Cherkasy;Boryspil;Synelnykove;Sarny;Balta;Nyzhnohirskyi;Novomoskovsk;Chornomorske;Pustomyty;Stryi;Zhydachiv;Kharkiv;Bar;Odesa;Khust;Zvenyhorodka;Saky;Koziatyn;Kyiv;Chernivtsi;Obukhiv;Kupiansk;Starobilsk;Horodenka;Kolomyia;Romny;Sniatyn;Novotroitske;Kerch;Myrhorod;Kozova;Lokhvytsya;Haivoron;Bolhrad;Podilsk;Voznesensk;Oleshky;Illintsi;Konotop;Yevpatoriia;Bucha;Kryzhopil;Pryluky;Luhansk;Mala Vyska;Rivne;Tatarbunary;Kremenchuk;Ivano-Frankivsk;Kropyvnytskyi;Korosten;Khorol;Verkhnodniprovsk;Yany Kapu;Chernihiv;Nizhyn;Apostolove;Bashtanka;Ovidiopol;Kakhovka;Nadvirna;Kamianske;Shostka;Pavlohrad;Vasylivka;Irshava;Donetsk;Tetiiv;Hola Prystan;Kremenets;Biliaivka;Mykhailivka;Uzhhorod;Varash;Ternivka;Dzhankoi;Makariv;Ternopil;Vinnytsia;Perevalsk;Mohyliv-Podilskyi;Sambir;Haisyn;Lozova +BO;Potosi;La Paz;Santa Cruz;Cochabamba;Bolivia;Tarija;Sucre;Cobija;Trinidad;Oruro +KP;Sariwon;Kanggye;Wonsan;Sariwon-si;Pyongyang;Ch'ongjin;Rajin;Korea, North;Hyesan;Hoeryong;Namsan;Sinuiju;Kaesong;Haeju;P'yongsong-si;Hamhung +IT;Trani;Trento;Trieste;Nuoro;Crotone;Vercelli;Florence;Belluno;Genoa;Brindisi;Viterbo;Pavia;Catanzaro;Pesaro;Latina;Como;Caltanissetta;Frosinone;Matera;Rovigo;Gorizia;Pistoia;Ferrara;Avellino;Tempio Pausania;Vicenza;Cagliari;Catania;Forli;Lucca;L'Aquila;Reggio di Calabria;Campobasso;Agrigento;Arezzo;Foggia;Bolzano;Bologna;Naples;Perugia;Massa;Trapani;Mantova;Potenza;Andria;Bergamo;Pescara;Rieti;Lodi;Reggio Emilia;Caserta;Siena;Taranto;Alessandria;Terni;Sondrio;Rimini;Novara;Varese;Italy;La Spezia;Ascoli Piceno;Prato;Messina;Cremona;Padova;Lecce;Brescia;Teramo;Chieti;Villacidro;Livorno;Ragusa;Grosseto;Ancona;Turin;Biella;Lecco;Vibo Valentia;Monza;Fermo;Salerno;Aosta;Savona;Udine;Ravenna;Tortoli;Benevento;Enna;Bari;Macerata;Pordenone;Rome;Modena;Barletta;Verona;Milan;Oristano;Siracusa;Imperia;Sassari;Pisa;Iglesias;Palermo;Olbia;Cosenza;Carbonia;Treviso;Venice;Parma;Cuneo;Piacenza;Verbania;Asti +EC;Loja;Cuenca;Macas;Tulcan;Latacunga;Guayaquil;Azogues;Santa Elena;Santo Domingo de los Colorados;Zamora;Puerto Baquerizo Moreno;Guaranda;Puyo;Machala;Nueva Loja;Riobamba;Ibarra;Babahoyo;Quito;Esmeraldas;Tena;Ambato;Puerto Francisco de Orellana;Portoviejo;Ecuador +SO;Boosaaso;Xuddur;Mogadishu;Baidoa;Buurhakaba;Kismaayo;Qoryooley;Gaalkacyo;Bu'aale;Afgooye;Hobyo;Jawhar;Dhuusamarreeb;Garbahaarrey;Marka;Laascaanood;Hargeysa;Somalia;Boorama;Beledweyne;Ceerigaabo;Garoowe;Burco +SY;Nubl;Ar Rastan;Khan Shaykhun;Darayya;Darat `Izzah;Aleppo;Tall Abyad;Jisr ash Shughur;Jaramana;Hamah;As Suwayda';Suran;Al Qamishli;Sahnaya;Tallkalakh;Al Mayadin;`Afrin;Ash Shaykh Badr;Damascus;Dayr az Zawr;Muhradah;Tall Rif`at;Armanaz;Dayr Hafir;Izra;Harasta;Babila;Maskanah;Ghabaghib;Salkhad;Homs;`Ayn al `Arab;Al Qunaytirah;Ar Raqqah;Busra ash Sham;Kafr Takharim;Syria;Binnish;Al Qutayfah;As Suqaylibiyah;Hish;Yabrud;Al Hajar al Aswad;Jablah;Al `Asharah;Ash Shaddadah;Taftanaz;As Sanamayn;Jarabulus;Tartus;Nawa;Muh Hasan;As Safirah;At Tall;Sarmin;Khirbat Ghazalah;Manbij;Tallbisah;Tadmur;Ath Thawrah;Madaya;Masyaf;Baniyas;Ma`arrat an Nu`man;Tasil;Al Musayfirah;Jindayris;Qadsayya;An Nabk;Ar Ruhaybah;Hajin;Idlib;Ra's al `Ayn;Harim;Kafr Batna;Kafr Nubl;As Sukhnah;Al Qardahah;Dhiban;Al Malikiyah;Shahba;Ad Duraykish;I`zaz;Shin;Al Atarib;Albu Kamal;Mari`;Al Qusayr;Jasim;Kafr Zayta;Safita;Ma`arratmisrin;Salqin;`Amuda;Duma;Al Kiswah;Al Hasakah;Tall Salhab;Al Muzayrib;Latakia;As Salamiyah;Qatana;Qal`at al Madiq;Al Qaryatayn;Dar`a;Dayr `Atiyah;Az Zabadani;Jayrud;Saraqib;Tadif;Saydnaya;Al Ghizlaniyah;Ad Darbasiyah;Mahin +ZM;Lusaka;Choma;Chinsali;Zambia;Ndola;Kasama;Chipata;Mansa;Kabwe;Mongu;Solwezi +BF;Yako;Boromo;Diapaga;Bobo-Dioulasso;Reo;Ziniare;Manga;Gayeri;Kombissiri;Kaya;Boulsa;Batie;Tougan;Koupela;Burkina Faso;Dedougou;Dano;Djibo;Po;Ouahigouya;Bousse;Hounde;Titao;Banfora;Nouna;Zorgo;Pama;Kokologo;Diebougou;Gorom-Gorom;Gourcy;Leo;Aribinda;Bogande;Fada Ngourma;Toma;Sapone;Ouargaye;Sebba;Koudougou;Tenkodogo;Dori;Ouagadougou;Korsimoro;Tanghin-Dassouri;Gaoua;Sapouy;Orodara;Salanso;Kongoussi +LB;Joubb Jannine;Batroun;Amioun;Bcharre;Beirut;Tripoli;Bent Jbail;El Hermel;Nabatiye;Zgharta;Halba;Baalbek;Lebanon;Zahle;Jounie;Tyre;Aaley;Sidon;Baabda;Jbail +RO;Piatra Neamt;Focsani;Bucharest;Vaslui;Cluj-Napoca;Zalau;Craiova;Resita;Galati;Iasi;Satu Mare;Calarasi;Botosani;Ploiesti;Drobeta-Turnu Severin;Baia Mare;Oradea;Giurgiu;Slatina;Romania;Braila;Buzau;Timisoara;Hunedoara;Constanta;Slobozia;Targu Jiu;Sfantu-Gheorghe;Bistrita;Brasov;Miercurea-Ciuc;Suceava;Arad;Alexandria;Tulcea;Targoviste;Targu-Mures;Bacau;Sibiu;Alba Iulia;Pitesti;Ramnicu Valcea +LK;Trincomalee;Anuradhapura;Sri Jayewardenepura Kotte;Ratnapura;Colombo;Sri Lanka;Badulla;Galle;Jaffna;Kandy;Kurunegala +AZ;Agsu;Saatli;Goygol;Qax;Gadabay;Lerik;Babak;Bilasuvar;Naxcivan;Haciqabul;Ordubad;Samux;Agdam;Sarur;Imisli;Barda;Naftalan;Baku;Ucar;Neftcala;Heydarabad;Samkir;Yevlax;Qusar;Azerbaijan;Susa;Astara;Sabirabad;Zaqatala;Quba;Goycay;Ismayilli;Tovuz;Siyazan;Agstafa;Zardab;Oguz;Goranboy;Cabrayil;Xocali;Qubadli;Davaci;Yardimli;Agcabadi;Sumqayit;Tartar;Culfa;Agdas;Lacin;Sahbuz;Qazax;Saki;Lankaran;Mingacevir;Xocavand;Daskasan;Masalli;Xizi;Xacmaz;Salyan;Kalbacar;Qivraq;Ganca;Fuzuli;Xirdalan;Kurdamir;Beylaqan;Calilabad;Qobustan;Qabala;Balakan +MG;Morondava;Fenoarivo Atsinanana;Maevatanana;Manakara;Antsohihy;Mahajanga;Tsiroanomandidy;Farafangana;Antsiranana;Antananarivo;Maintirano;Tolanaro;Ambatondrazaka;Ambovombe;Antsirabe;Sambava;Ambositra;Fianarantsoa;Toliara;Ihosy;Toamasina;Miarinarivo;Madagascar +VE;Machiques;Colon;Rubio;Aragua de Barcelona;Caracas;Guanare;Tariba;Escuque;Puerto Piritu;La Guaira;Barquisimeto;Motatan;El Limon;Ocumare del Tuy;San Antonio de Los Altos;Ciudad Guayana;El Tocuyo;Cabimas;Caucagua;La Colonia Tovar;Baruta;El Consejo;Ciudad Piar;Maiquetia;Montalban;Chichiriviche;Guatire;San Diego;Guacara;La Victoria;Valera;Quibor;Ejido;Turmero;Altagracia de Orituco;San Jose de Aerocuar;Calabozo;Anaco;Cabudare;Cumana;San Carlos;Valencia;Valle de La Pascua;Cantaura;Caraballeda;Monte Carmelo;Petare;El Callao;San Carlos del Zulia;Tocuyito;Piritu;Palo Negro;Punto Fijo;Guanta;Maturin;Lecherias;El Socorro;Bocono;Las Mercedes;Ciudad Bolivar;Villa de Cura;Guasipati;Yaritagua;Barcelona;Carupano;Puerto La Cruz;Chaguaramas;Trujillo;Onoto;Uracoa;Sabaneta;Chivacoa;El Tigre;Charallave;Puerto Cabello;Guasdualito;Los Teques;Santa Cruz;Coro;Tucupita;Higuerote;San Jose de Guaribe;Caripito;La Asuncion;Maracay;Rio Caribe;Maracaibo;Acarigua;Clarines;Barbacoas;Tucacas;Juangriego;Puerto Ayacucho;El Palmar;Ciudad Ojeda;Venezuela;San Antonio del Tachira;Urena;Isla Raton;Porlamar;San Cristobal;Pampan;Carache;San Fernando;Aguasay;Carvajal;Barinas;Merida;La Grita;Santa Maria de Ipire;Carora;Guigue;Santa Barbara;Araure;Upata;Santa Cruz de Los Taques;El Valle del Espiritu Santo;La Esmeralda;Guarenas;Socopo;Catia La Mar;Cagua;Cocorote;Los Guayos;San Felipe;Ospino;Sarare;Santa Rita;Pampatar;Santa Lucia;Puerto Cumarebo;San Juan de los Morros;Pampanito;Achaguas;Santa Teresa del Tuy;Sanare;Nirgua;Cojedes;San Pedro de Coche;Santa Ana;Cua;Zaraza;Tinaquillo;San Mateo +AT;Zell am See;Feldbach;Austria;Imst;Amstetten;Weiz;Bregenz;Neunkirchen;Vocklabruck;Lienz;Bludenz;Deutschlandsberg;Zwettl;Krems an der Donau;Salzburg;Leoben;Dornbirn;Innsbruck;Baden;Klosterneuburg;Judenburg;Eisenstadt;Korneuburg;Sankt Johann im Pongau;Bruck an der Mur;Ganserndorf;Steyr;Gmunden;Vienna;Volkermarkt;Wolfsberg;Feldkirchen;Schwaz;Mistelbach;Voitsberg;Spittal an der Drau;Linz;Hallein;Modling;Sankt Polten;Braunau am Inn;Wiener Neustadt;Kufstein;Klagenfurt;Sankt Veit an der Glan;Ried im Innkreis;Villach;Graz;Wels;Hollabrunn;Leibnitz +ZW;Chinhoyi;Harare;Marondera;Lupane;Mutare;Gweru;Gwanda;Zimbabwe;Bulawayo;Masvingo;Bindura +KH;Sisophon;Tbeng Meanchey;Kampong Speu;Kampong Cham;Kampong Chhnang;Stung Treng;Prey Veng;Svay Rieng;Kampong Trach;Koh Kong;Takeo;Battambang;Sihanoukville;Sen Monorom;Kratie;Kampot;Pailin;Phnom Penh;Lumphat;Suong;Kampong Thom;Cambodia;Krong Kep;Siem Reap;Samraong;Ta Khmau;Banlung;Pursat +SE;Vanersborg;Kavlinge;Skelleftea;Balsta;Timra;Ronneby;Ornskoldsvik;Gavle;Orebro;Gislaved;Ulricehamn;Ystad;Oxelosund;Mjolby;Uddevalla;Harnosand;Ljungby;Arboga;Soderhamn;Sala;Taby;Staffanstorp;Hassleholm;Stockholm;Karlskrona;Enkoping;Sandviken;Karlstad;Molndal;Jonkoping;Mariestad;Tranas;Vasteras;Malmo;Hagfors;Vaxjo;Hallstahammar;Katrineholm;Motala;Uppsala;Eskilstuna;Nynashamn;Hoor;Falun;Skara;Stenungsund;Kalmar;Vetlanda;Bollnas;Hudiksvall;Kungsangen;Vallentuna;Vasterhaninge;Kumla;Kiruna;Koping;Kinna;Skovde;Vimmerby;Ostersund;Upplands Vasby;Sundsvall;Skoghall;Lomma;Lerum;Sunne;Kungsbacka;Lidkoping;Boden;Habo;Molnlycke;Tibro;Tumba;Gothenburg;Borlange;Kristianstad;Fagersta;Falkoping;Linkoping;Alingsas;Varberg;Arvika;Karlskoga;Ekero;Trelleborg;Strangnas;Amal;Akersberga;Sater;Norrtalje;Marsta;Helsingborg;Saffle;Gagnef;Lidingo;Eslov;Svedala;Sodertalje;Orkelljunga;Nykoping;Nybro;Sjobo;Astorp;Angelholm;Halmstad;Lycksele;Oskarshamn;Eksjo;Pitea;Kungalv;Lulea;Norrkoping;Ludvika;Sweden;Filipstad;Solleftea;Umea;Visby;Kristinehamn;Nassjo;Boras;Avesta;Lindesberg;Trollhattan;Gustavsberg;Arlov;Finspang;Hoganas;Almhult;Landskrona;Mora;Vastervik +CU;Ciro Redondo;Colon;Jobabo;Santa Cruz del Sur;Vinales;Jaruco;Ranchuelo;Manati;Palma Soriano;Cardenas;Sagua de Tanamo;Los Arabos;Perico;Limonar;Guira de Melena;Palmira;Minas;Buey Arriba;Jesus Menendez;Jaguey Grande;Majagua;Aguada de Pasajeros;Nueva Gerona;Sancti Spiritus;Campechuela;Candelaria;Guisa;Moron;Najasa;Marti;Guaimaro;Imias;Cacocum;Baguanos;Jovellanos;Bayamo;Manzanillo;Guanajay;San Luis;Pilon;Havana;Cabaiguan;Quemado de Guines;Carlos Manuel de Cespedes;Media Luna;Guantanamo;Bejucal;Madruga;Taguasco;San Jose de las Lajas;San German;Minas de Matahambre;Guines;Yaguajay;Moa;Cuba;Santa Clara;Chambas;Puerto Padre;Esmeralda;Placetas;San Antonio del Sur;Santo Domingo;Nueva Paz;Batabano;Bolivia;Union de Reyes;Lajas;Artemisa;Trinidad;San Nicolas;La Sierpe;Corralillo;Caimito;Camaguey;Santa Cruz del Norte;Jatibonico;Rio Cauto;Sagua la Grande;Colombia;Venezuela;Bartolome Maso;Yara;Quivican;San Cristobal;Consolacion del Sur;Vertientes;Fomento;Remedios;Sibanicu;Manicaragua;Bahia Honda;Mayari;Baracoa;Melena del Sur;Banes;Primero de Enero;Gibara;Contramaestre;Los Palacios;Jiguani;Cayo Mambi;Las Tunas;Alquizar;Cienfuegos;Matanzas;Cruces;Jimaguayu;Rodas;Cifuentes;Pedro Betancourt;Santa Lucia;Caibarien;Santiago de Cuba;Holguin;Ciego de Avila;San Juan y Martinez;Mariel;Cumanayagua;San Antonio de los Banos;La Palma;Antilla;Amancio;Bauta;Pinar del Rio;Camajuani;Calimete;Encrucijada;Florencia;Niquero;Florida +BY;Baranavichy;Rechytsa;Maladzyechna;Bykhaw;Belarus;Babruysk;Mahilyow;Krychaw;Ivanava;Hantsavichy;Mazyr;Yelsk;Rahachow;Chavusy;Ashmyany;Vyetka;Ivatsevichy;Vawkavysk;Minsk;Pruzhany;Svyetlahorsk;Vilyeyka;Khoyniki;Kobryn;Masty;Zhytkavichy;Klimavichy;Hrodna;Zhlobin;Smarhon;Lyuban;Polatsk;Drahichyn;Pinsk;Pastavy;Slonim;Brest;Orsha;Byalynichy;Talachyn;Homyel';Chachersk;Kalinkavichy;Salihorsk;Mstsislaw;Vitsyebsk;Horki;Horad Zhodzina;Lyakhavichy;Shklow;Haradok;Shchuchyn;Hlybokaye;Kastsyukovichy;Braslaw;Kapyl;Malaryta;Valozhyn;Lyelchytsy;Horad Smalyavichy;Staryya Darohi;Dobrush;Luninyets;Nyasvizh;Astravyets;Buda-Kashalyova;Asipovichy;Pyetrykaw;Lahoysk;Slutsk;Horad Barysaw;Klyetsk;Zhabinka;Byaroza +NL;Winterswijk;Rucphen;Den Helder;Valkenswaard;Oldenzaal;Haren;Kapelle;Bedum;Roermond;Putten;Stein;Sliedrecht;Oosterhout;Houten;Montfoort;Malden;Ede;Brielle;Urk;Wierden;Waalre;Burgum;Bergen op Zoom;Papendrecht;Nijmegen;Arnhem;Heerde;Rijen;Neerijnen;Schiedam;Hulst;Oosterwolde;Purmerend;Beek;Bodegraven;Harlingen;Tiel;Middelburg;Lelystad;Huizen;Alkmaar;Middelharnis;Nijverdal;Zwolle;Pijnacker;Tholen;Hengelo;Aalsmeer;Steenbergen;Franeker;Raamsdonksveer;Doesburg;Apeldoorn;Almere;Beuningen;Brummen;Oirschot;Utrecht;Someren;Tilburg;Hardinxveld-Giessendam;Winsum;Sneek;Goes;Deventer;Hoogeveen;Heeze;Zevenaar;Sittard;Uden;Brunssum;Amstelveen;Katwijk;Wijk bij Duurstede;Appingedam;Spijkenisse;Drachten;Elburg;Bladel;Epe;Venray;Wijchen;Beverwijk;Aalten;Groesbeek;Oss;Zutphen;Oisterwijk;Tubbergen;Vlaardingen;Grave;Simpelveld;Hendrik-Ido-Ambacht;Dordrecht;Heemstede;Breda;Cuijk;Rhenen;Hilversum;Sassenheim;Haaksbergen;Doorn;Boxtel;IJmuiden;Waddinxveen;Joure;Delfzijl;Landgraaf;Lochem;Son en Breugel;Geldermalsen;Wolvega;Deurne;Asten;Zandvoort;Gennep;Uitgeest;Terneuzen;Heerlen;Heerhugowaard;IJsselstein;Meerssen;Staphorst;Enkhuizen;Noordwijkerhout;Boekel;Laren;Heerenveen;Rotterdam;Nieuwegein;Leek;Capelle aan den IJssel;Noordwijk;Woerden;Woudrichem;Strijen;Leiden;Oldebroek;Oostzaan;Nuth;Werkendam;Boskoop;Maastricht;Heemskerk;Blaricum;Schagen;Hellevoetsluis;Woudenberg;Horst;Amersfoort;Zundert;Soest;Eemnes;'s-Hertogenbosch;Ermelo;Haarlem;Hillegom;Scherpenzeel;Beilen;Almelo;Best;Zaandam;Duiven;Baarn;Veenendaal;Emmeloord;Amsterdam;Alblasserdam;Voorschoten;Helmond;Bussum;The Hague;Zierikzee;Waalwijk;Bergeijk;Bergen;Volendam;Nederweert;Kampen;Netherlands;Nuenen;Barneveld;Kaatsheuvel;Leeuwarden;Doetinchem;Wassenaar;Landsmeer;Goirle;Haaren;Dongen;Coevorden;Leusden;Veldhoven;Heiloo;Vianen;Monnickendam;Gemert;Zoetermeer;Weert;Emmen;Beek en Donk;Veghel;Harderwijk;Maarssen;Zeewolde;Hoogezand;Denekamp;Uithoorn;Zaltbommel;Hoofddorp;Boxmeer;Opmeer;Oudenbosch;Roelofarendsveen;Veendam;Nunspeet;Kerkrade;Lopik;Krimpen aan den IJssel;Sint-Michielsgestel;Zuidhorn;Dalfsen;Bergambacht;Heesch;Lisse;Leidschendam;Oud-Beijerland;Rijswijk;Eijsden;Vlissingen;Venlo;Leiderdorp;Lemmer;Hoorn;Gouda;Diemen;Schoonhoven;Zeist;Castricum;Meppel;Maassluis;Etten-Leur;Oegstgeest;Bilthoven;Vught;Borne;Sint Anthonis;Stadskanaal;Geldrop;Delft;Zoeterwoude;Eindhoven;Wageningen;Ommen;Made;Naaldwijk;Raalte;Barendrecht;Hilvarenbeek;Sint-Oedenrode;Westervoort;Schinnen;Dronten;Gorinchem;Assen;Losser;Dokkum;Hattem;Voerendaal;Groningen;Oudewater;Roosendaal;Nijkerk;Culemborg;Ridderkerk;Eersel;Enschede;Alphen aan den Rijn;Weesp;Hardenberg;Vaals;Druten;Zwijndrecht +KZ;Taraz;Beyneu;Shiyeli;Qaratau;Shakhtinsk;Ekibastuz;Qashyr;Alga;Aqtobe;Novoishimskiy;Kazakhstan;Shalqar;Turkistan;Taldyqorghan;Qapshaghay;Qarabalyq;Balqash;Khromtau;Soran;Ertis;Kishkenekol;Qazyqurt;Zhitiqara;Zaysan;Kegen;Pavlodar;Oktyabr'sk;Almaty;Qyzylorda;Makhambet;Petropavl;Qaraghandy;Bulaevo;Osakarovka;Bayanauyl;Zhosaly;Saumalkol;Bayghanin;Aral;Shetpe;Sarykemer;Kentau;Atbasar;Esil;Oral;Atyrau;Zhetisay;Aqsu;Semey;Mangghystau;Esik;Qostanay;Qulsary;Inderbor;Borovskoy;Ayagoz;Shymkent;Temirtau;Astana;Aksay;Ushtobe;Bauyrzhan Momyshuly;Qazaly;Shchuchinsk;Oskemen;Zhangatas;Stepnogorsk;Qarabulaq;Fort-Shevchenko;Satbayev;Aqkol;Talghar;Abay;Derzhavinsk;Shemonaikha;Qulan;Qarqaraly;Rudnyy;Taiynsha;Qaskeleng;Uryzhar;Tekeli;Zyryanovsk;Uzynaghash;Lengir;Zhezqazghan;Torghay;Qarazhal;Makinsk;Atasu;Shongzhy;Ereymentau;Aqtau;Shu;Kokshetau;Sarqan;Zharkent;Arys;Balpyq Bi;Sholaqqorghan;Qusmuryn;Shar;Maqat;Sharbaqty;Arqalyq;Chapaev;Usharal;Ridder;Zhanibek +MW;Kasungu;Dowa;Karonga;Ntcheu;Nkhata Bay;Salima;Ntchisi;Lilongwe;Mangochi;Dedza;Phalombe;Nkhotakota;Mchinji;Nsanje;Machinga;Blantyre;Mzimba;Neno;Mwanza;Chitipa;Mulanje;Thyolo;Rumphi;Zomba;Chikwawa;Chiradzulu;Balaka;Malawi +PL;Bedzin;Gdansk;Pulawy;Wejherowo;Mielec;Glogow;Nysa;Zabkowice Slaskie;Bartoszyce;Wolomin;Walbrzych;Swiecie;Olawa;Otwock;Legionowo;Drawsko Pomorskie;Kamien Pomorski;Garwolin;Zywiec;Miedzychod;Grodzisk Mazowiecki;Zabrze;Rypin;Nowy Targ;Boleslawiec;Swiedbodzin;Siedlce;Gryfino;Klobuck;Przasnysz;Busko-Zdroj;Slupsk;Myslowice;Lidzbark Warminski;Koscian;Tychy;Sieradz;Gizycko;Sandomierz;Ostrzeszow;Siemiatycze;Czluchow;Krosno Odrzanskie;Elk;Klodzko;Lublin;Miedzyrzecz;Zuromin;Wolsztyn;Olesnica;Olesno;Lubin;Strzelce Krajenskie;Lask;Puck;Sroda Wielkopolska;Nisko;Dabrowa Tarnowska;Jedrzejow;Stargard Szczecinski;Szczytno;Kalisz;Kedzierzyn-Kozle;Wschowa;Chrzanow;Ruda Slaska;Radomsko;Swidwin;Wyszkow;Wieliczka;Chelm;Sztum;Parczew;Sokolow Podlaski;Krapkowice;Slupca;Ostroleka;Radom;Rybnik;Zakopane;Czarnkow;Ropczyce;Milicz;Aleksandrow Kujawski;Olsztyn;Legnica;Pinczow;Lowicz;Trzebnica;Sierpc;Skarzysko-Kamienna;Mikolow;Augustow;Polkowice;Jaworzno;Znin;Debica;Naklo nad Notecia;Sochaczew;Poland;Sepolno Krajenskie;Lobez;Zyrardow;Gorzow Wielkopolski;Zawiercie;Glubczyce;Kwidzyn;Pyrzyce;Wielun;Suwalki;Leczna;Wabrzezno;Hrubieszow;Nidzica;Wegorzewo;Nowy Tomysl;Warsaw;Rzeszow;Chorzow;Wloclawek;Police;Wagrowiec;Nowe Miasto Lubawskie;Nowy Sacz;Grojec;Zamosc;Tomaszow Mazowiecki;Tomaszow Lubelski;Nowy Dwor Mazowiecki;Janow Lubelski;Kolbuszowa;Mragowo;Sopot;Bielsko-Biala;Zagan;Slubice;Chojnice;Swinoujscie;Tarnobrzeg;Brzeziny;Wieruszow;Luban;Ciechanow;Tczew;Kamienna Gora;Hajnowka;Belchatow;Krotoszyn;Bytow;Mysliborz;Tarnowskie Gory;Gorlice;Plock;Krosno;Bialogard;Inowroclaw;Walcz;Gryfice;Dabrowa Gornicza;Kartuzy;Pruszkow;Pila;Strzelce Opolskie;Grajewo;Poznan;Torun;Koszalin;Chodziez;Raciborz;Lancut;Minsk Mazowiecki;Konskie;Kolo;Limanowa;Swietochlowice;Jawor;Pabianice;Grudziadz;Kutno;Strzelin;Nowa Sol;Prudnik;Grodzisk Wielkopolski;Zambrow;Mogilno;Wlodawa;Kozienice;Mlawa;Piaseczno;Gora;Cieszyn;Zgorzelec;Lubartow;Zory;Bilgoraj;Piekary Slaskie;Opoczno;Turek;Kolobrzeg;Brzeg;Namyslow;Miechow;Oswiecim;Gostyn;Konin;Dzialdowo;Kolno;Gliwice;Bytom;Jaroslaw;Olecko;Ostrowiec Swietokrzyski;Brzesko;Zary;Sucha Beskidzka;Bierun;Lomza;Zlotow;Myszkow;Gdynia;Ozarow Mazowiecki;Swidnik;Staszow;Przeworsk;Ustrzyki Dolne;Lubaczow;Starachowice;Choszczno;Zielona Gora;Rawicz;Srem;Zgierz;Makow Mazowiecki;Olkusz;Ketrzyn;Bochnia;Brodnica;Przemysl;Bydgoszcz;Leszno;Lipno;Wroclaw;Jelenia Gora;Chelmno;Lebork;Lukow;Ostroda;Oborniki;Szczecinek;Krasnik;Ilawa;Ryki;Opole;Pisz;Ostrow Mazowiecka;Golub-Dobrzyn;Lodz;Slawno;Kielce;Sokolka;Goleniow;Wolow;Tuchola;Szydlowiec;Krakow;Pruszcz Gdanski;Piotrkow Trybunalski;Pszczyna;Kepno;Stalowa Wola;Swidnica;Lubliniec;Wloszczowa;Bielsk Podlaski;Siemianowice Slaskie;Pleszew;Jaslo;Elblag;Rawa Mazowiecka;Szczecin;Lezajsk;Pultusk;Krasnystaw;Sulecin;Sosnowiec;Malbork;Wodzislaw Slaski;Goldap;Sanok;Gostynin;Czestochowa;Biala Podlaska;Starogard Gdanski;Myslenice;Wegrow;Zlotoryja;Plonsk;Bialystok;Zdunska Wola;Dzierzoniow;Radzyn Podlaski;Braniewo;Skierniewice;Monki;Tarnow;Wrzesnia;Szamotuly;Kluczbork;Leczyca;Wadowice;Koscierzyna;Ostrow Wielkopolski;Katowice;Wysokie Mazowieckie;Jastrzebie-Zdroj;Sroda Slaska;Nowy Dwor Gdanski;Jarocin;Gniezno +PR;San Juan +CG;Owando;Djambala;Brazzaville;Madingou;Dolisie;Loango;Kinkala;Congo (Brazzaville);Sibiti;Impfondo;Ouesso;Pointe-Noire;Ewo +UY;Montevideo;Mercedes;Treinta y Tres;Rocha;Maldonado;Canelones;Durazno;Rivera;Uruguay;Melo;Tacuarembo;San Jose;Minas;Paysandu;Salto;Fray Bentos;Colonia del Sacramento;Florida;Artigas +HN;La Paz;La Ceiba;Yuscaran;Trujillo;San Pedro Sula;Honduras;Santa Barbara;Ciudad Choluteca;Juticalpa;Tegucigalpa;Puerto Lempira;Santa Rosa de Copan;Roatan;La Esperanza;Nueva Ocotepeque;Comayagua;Gracias;Nacaome;Yoro +UG;Nakasongola;Rukungiri;Kiryandongo;Lwengo;Kasanda;Ntoroko;Lyantonde;Otuke;Gulu;Namutumba;Kotido;Kanungu;Kyenjojo;Soroti;Bukwo;Nabilatuk;Luuka Town;Mitoma;Adjumani;Bundibugyo;Rubirizi;Kabale;Sironko;Busia;Pader;Moyo;Nsiika;Mukono;Kasaali;Maracha;Kiboga;Bulisa;Kalungu;Lira;Kalangala;Kamwenge;Kyankwanzi;Agago;Ntara;Lamwo;Tororo;Luwero;Busesa;Mpigi;Uganda;Bulambuli;Mityana;Kumi;Kibingo;Alebtong;Ngora;Kayunga;Amolatar;Nwoya;Kagadi;Kibiito;Napak;Kiruhura;Serere;Masaka;Pakwach;Kalaki;Kyegegwa;Rubanda;Abim;Manafwa;Amuria;Mbale;Mayuge;Buikwe;Bukomansimbi;Kapchorwa;Jinja;Wakiso;Zombo;Nakapiripirit;Arua;Koboko;Mbarara;Kakumiro;Buyende;Amudat;Yumbe;Kampala;Ibanda;Kanoni;Mubende;Kisoro;Rakai;Binyin;Apac;Butaleja;Iganga;Palenga;Kinoni;Oyam;Katakwi;Butebo;Hoima;Kitamilo;Fort Portal;Kaliro;Kitgum;Gombe;Butalangu;Nebbi;Kibuku;Bugiri;Bushenyi;Mparo;Namayingo;Dokolo;Amuru;Moroto;Kasese;Bududa;Kole;Ntungamo;Sembabule;Isingiro;Kamuli;Kaberamaido;Bupoto;Masindi;Bukedea;Pallisa;Kaabong +GN;Faranah;Kindia;Gueckedou;Fria;Pita;Kissidougou;Koundara;Macenta;Guinea;Gaoual;Mali;Tougue;Mamou;Forecariah;Dinguiraye;Dalaba;Boke;Conakry;Kouroussa;N'Zerekore;Yomou;Boffa;Kerouane;Telimele;Siguiri;Dabola;Beyla;Kankan;Labe +BG;Sofia;Stara Zagora;Montana;Tutrakan;Veliko Tarnovo;Kyustendil;Blagoevgrad;Bulgaria;Dospat;Sliven;Gabrovo;Rakitovo;Burgas;Knezha;Kavarna;Pazardzhik;Zlatograd;Smolyan;Pernik;Kozloduy;Velingrad;Pleven;Vidin;Sozopol;Lovech;Silistra;Panagyurishte;Haskovo;Razgrad;Plovdiv;Peshtera;Balchik;Berkovitsa;Shumen;Yambol;Lom;Svishtov;Varna;Kardzhali;Ruse;Dobrich;Pomorie;Nesebar;Targovishte;Vratsa +CR;San Jose;Costa Rica;Heredia;Liberia;Puntarenas;Alajuela;Cartago;Puerto Limon +RW;Kigali;Byumba;Rwanda;Nyanza;Kibuye;Rwamagana +PA;Colon;Penonome;Chepo;Jaque;Sabanitas;Las Tablas;Puerto Armuelles;Sieyik;Union Choco;Puerto Pilon;Bocas del Toro;Alcalde Diaz;Buabidi;Pacora;Arraijan;Santiago;Ancon;Bajo Boquete;Kusapin;Tocumen;Cativa;Aguadulce;Nuevo Arraijan;Chilibre;Puerto Caimito;Chitre;Changuinola;Gaigirgordub;David;Panama;La Chorrera;La Palma;Almirante;Panama City;El Coco;San Miguelito +SN;Thies;Diourbel;Kolda;Fatick;Ziguinchor;Dakar;Kaolack;Matam;Sedhiou;Saint-Louis;Louga;Tambacounda;Senegal;Kedougou;Kaffrine +OM;`Ibri;Muscat;Hayma';Oman;Sur;Ibra';Salalah;Ar Rustaq;Suhar;Khasab;Nizwa;Al Buraymi;Al Mazyunah +MN;Uliastay;Ulaangom;Baruun-Urt;Mandalgovi;Ulaan-Uul;Ondorhaan;Dzuunmod;Suhbaatar;Hovd;Bayanhongor;Bulgan;Altay;Hoshoot;Mongolia;Choyr;Saynshand;Choybalsan;Tsetserleg;Arvayheer;Erdenet;Dalanzadgad;Darhan;Olgiy;Dalandzadgad;Harhorin;Moron;Ulaanbaatar +IL;Beersheba;Tel Aviv-Yafo;Nazareth;Jerusalem;Haifa;Israel;Ramla +DK;Taastrup;Frederiksvaerk;Ikast;Thisted;Skive;Copenhagen;Esbjerg;Hjorring;Slagelse;Vejle;Vordingborg;Kalundborg;Svendborg;Hedensted;Kolding;Nykobing Mors;Frederikshavn;Holstebro;Silkeborg;Horsholm;Nykobing Falster;Ringsted;Denmark;Holbaek;Herning;Haslev;Hobro;Grenaa;Bronderslev;Koge;Odense;Aarhus;Stovring;Skanderborg;Nyborg;Odder;Varde;Vejen;Horsens;Sonderborg;Naestved;Frederikssund;Helsingor;Soro;Fredericia;Viborg;Randers;Ringkobing;Helsinge;Dragor;Solrod Strand;Ronne;Aabenraa;Struer;Roskilde;Middelfart;Aalborg;Aars;Hillerod;Haderslev;Farum +FI;Hamina;Imatra;Laukaa;Espoo;Savonlinna;Tuusula;Jarvenpaa;Kempele;Kerava;Sipoo;Kontiolahti;Loimaa;Ii;Kirkkonummi;Alavus;Forssa;Lempaala;Laitila;Karkkila;Masku;Liperi;Ylojarvi;Kitee;Eura;Kajaani;Raahe;Somero;Kuusamo;Jyvaskyla;Joensuu;Tornio;Loviisa;Ylivieska;Jamsa;Sodankyla;Hameenkyro;Kalajoki;Sotkamo;Kurikka;Hanko;Helsinki;Orivesi;Orimattila;Eurajoki;Muhos;Kauniainen;Korsholm;Kangasala;Salo;Kankaanpaa;Nokia;Kuopio;Rauma;Turku;Keuruu;Vaasa;Siilinjarvi;Huittinen;Mantsala;Lieksa;Nivala;Pieksamaki;Lappeenranta;Kaarina;Janakkala;Mariehamn;Raisio;Lahti;Alajarvi;Ulvila;Lapua;Kotka;Porvoo;Kokkola;Leppavirta;Hollola;Paimio;Saarijarvi;Mikkeli;Naantali;Jakobstad;Pargas;Iisalmi;Kauhava;Hausjarvi;Kemijarvi;Sastamala;Heinola;Vantaa;Pirkkala;Uusikaupunki;Kemi;Tampere;Kouvola;Hameenlinna;Valkeakoski;Kauhajoki;Narpes;Riihimaki;Varkaus;Rovaniemi;Lapinlahti;Hyvinkaa;Finland;Muurame;Seinajoki;Lohja;Lieto;Raseborg;Pori;Liminka;Ilmajoki;Aanekoski;Oulu +CZ;Plzen;Liberec;Usti nad Labem;Brno;Pardubice;Hradec Kralove;Prague;Jihlava;Zlin;Ostrava;Czechia;Karlovy Vary;Olomouc;Ceske Budejovice +NZ;Stratford;Auckland;Whangarei;Greymouth;Wellington;Dunedin;Napier;New Zealand;Palmerston North;Hamilton;Gisborne;Richmond;Blenheim;Waitangi;Nelson;Invercargill;Christchurch;Whakatane +DO;Monte Plata;San Francisco de Macoris;Salcedo;La Vega;Bani;Jimani;Santo Domingo;La Romana;Sabaneta;San Juan;Nagua;El Seibo;Monte Cristi;Santiago;Samana;Pedernales;Bonao;Moca;Azua;San Jose de Ocoa;Higuey;Comendador;Puerto Plata;San Pedro de Macoris;Hato Mayor;Santo Domingo Este;Neiba;San Cristobal;Dajabon;Dominican Republic;Barahona;Mao;Cotui +PT;Santiago do Cacem;Mirandela;Albergaria-a-Velha;Olhao;Guarda;Rio Maior;Elvas;Caldas da Rainha;Tondela;Lourinha;Alcobaca;Vouzela;Sines;Silves;Beja;Melgaco;Lousa;Coimbra;Fafe;Aljustrel;Batalha;Peso da Regua;Baiao;Ponte de Lima;Montemor-o-Novo;Serta;Alcanena;Tavira;Gouveia;Portimao;Ponta Delgada;Portalegre;Torres Vedras;Macedo de Cavaleiros;Tomar;Celorico de Basto;Vagos;Gondomar;Ovar;Vila Real;Vendas Novas;Viseu;Miranda do Corvo;Moncao;Ansiao;Esposende;Mangualde;Satao;Vila Real de Santo Antonio;Montalegre;Montijo;Lousada;Vila Verde;Cabeceiras de Basto;Tabua;Vila Nova de Gaia;Castelo Branco;Moura;Alcochete;Idanha-a-Nova;Evora;Vila do Conde;Ponte da Barca;Fundao;Castro Daire;Braganca;Cadaval;Odivelas;Amarante;Moimenta da Beira;Agueda;Viana do Castelo;Mealhada;Pombal;Sao Joao da Madeira;Pacos de Ferreira;Almeirim;Loule;Povoa de Varzim;Barreiro;Seixal;Reguengos de Monsaraz;Trofa;Santarem;Salvaterra de Magos;Vila Pouca de Aguiar;Mortagua;Arruda dos Vinhos;Condeixa-a-Nova;Montemor-o-Velho;Sobral de Monte Agraco;Ourem;Benavente;Oliveira de Azemeis;Entroncamento;Sao Pedro do Sul;Abrantes;Bombarral;Chamusca;Odemira;Maia;Sesimbra;Obidos;Oliveira do Hospital;Porto;Valongo;Mafra;Vieira do Minho;Vinhais;Santo Tirso;Ponte de Sor;Povoa de Lanhoso;Cinfaes;Espinho;Vila Nova de Cerveira;Torres Novas;Loures;Arganil;Arcos de Valdevez;Nazare;Grandola;Paredes de Coura;Figueira da Foz;Barcelos;Mira;Alijo;Pinhel;Anadia;Portugal;Resende;Aveiro;Lagoa;Estremoz;Funchal;Arouca;Leiria;Braga;Ilhavo;Trancoso;Chaves;Azambuja;Lisbon;Moita;Sintra;Alcacer do Sal;Valenca;Oliveira do Bairro;Marinha Grande;Murtosa;Porto de Mos;Torre de Moncorvo;Lagos;Paredes;Sabugal;Amares;Penafiel;Matosinhos;Feira;Penacova;Almada;Seia;Felgueiras;Peniche;Setubal;Vila Franca de Xira;Amadora;Soure;Palmela;Serpa;Cantanhede;Oeiras;Estarreja;Coruche;Guimaraes;Sao Bras de Alportel;Cascais;Lamego;Sever do Vouga;Nelas;Cartaxo;Marco de Canavezes;Famalicao;Oliveira de Frades;Faro;Albufeira;Ferreira do Zezere;Alenquer;Vale de Cambra;Covilha;Santa Comba Dao +IE;Limerick;Dun Dealgan;Nenagh;Tralee;Waterford;Port Laoise;Ennis;Tallaght;Tullamore;Castlebar;Monaghan;Ros Comain;Lifford;Dunleary;Swords;Naas;Wexford;Carrick on Shannon;Galway;Wicklow;Carlow;Kilkenny;Ireland;Cork;An Cabhan;Sligo;Clonmel;Dublin;Mullingar;Longford;Trim +BE;Roeselare;Waremme;Wavre;Sint-Niklaas;Maaseik;Mons;Ostend;Marche-en-Famenne;Mouscron;Bruges;Soignies;Aalst;Namur;Ath;Virton;Veurne;Diksmuide;Belgium;Enghien;Eeklo;Philippeville;Gent;Charleroi;Tielt;Oudenaarde;Turnhout;Brussels;Bastogne;Hannut;Kortrijk;Hasselt;Liege;Arlon;Verviers;Thuin;Vilvoorde;Antwerp;Tournai;Tongeren;Nivelles;Mechelen;Ieper;Dendermonde;Huy;Dinant +RS;Presevo;Gadzin Han;Knic;Priboj;Kragujevac;Zrenjanin;Kikinda;Blace;Bajina Basta;Kovacica;Vladicin Han;Senta;Ljubovija;Bojnik;Smederevska Palanka;Nova Varos;Stara Pazova;Kanjiza;Srbobran;Mali Idos;Sabac;Niska Banja;Sremski Karlovci;Pozarevac;Lazarevac;Serbia;Kostolac;Valjevo;Novi Becej;Crna Trava;Vrbas;Razanj;Bela Crkva;Ada;Bosilegrad;Arilje;Rekovac;Indija;Jagodina;Backa Topola;Novi Pazar;Vrnjacka Banja;Bujanovac;Vladimirci;Malo Crnice;Coka;Krupanj;Dimitrovgrad;Becej;Vrsac;Golubac;Pirot;Sid;Surcin;Nova Crnja;Negotin;Merosina;Cacak;Novi Sad;Kraljevo;Nis;Sjenica;Zabalj;Lajkovac;Kula;Lucani;Koceljeva;Vranje;Arandelovac;Plandiste;Kovin;Belgrade;Varvarin;Cuprija;Medveda;Veliko Gradiste;Mali Zvornik;Titel;Sokobanja;Pancevo;Lapovo;Zitorada;Velika Plana;Pozega;Novi Knezevac;Uzice;Petrovac na Mlavi;Trstenik;Bor;Doljevac;Vlasotince;Leskovac;Bela Palanka;Kursumlija;Ljig;Irig;Svilajnac;Sremska Mitrovica;Ruma;Zagubica;Zabari;Prokuplje;Zajecar;Bogatic;Boljevac;Krusevac;Topola;Majdanpek;Babusnica;Backa Palanka;Svrljig;Osecina;Gornji Milanovac;Brus;Petrovaradin;Ivanjica;Aleksinac;Aleksandrovac;Raska;Loznica;Cicevac;Prijepolje;Smederevo;Zemun;Mionica;Zitiste;Kucevo;Cajetina;Despotovac;Beocin;Batocina;Paracin;Kladovo;Ub;Subotica;Trgoviste;Backi Petrovac;Temerin;Bac;Sombor;Apatin;Knjazevac;Secanj;Lebane;Tutin;Kosjeric;Pecinci;Opovo;Mladenovac;Odzaci;Raca;Alibunar;Surdulica +QA;Ash Shihaniyah;Ar Rayyan;Madinat ash Shamal;Umm Salal `Ali;Al Wakrah;Doha;Az Za`ayin;Qatar;Al Khawr +LY;Tobruk;Libya;Al Jawf;Nalut;Ghat;Al Marj;Zuwarah;Al `Aziziyah;Tripoli;Al Bayda';Surt;Sabha;Hun;Murzuq;Awbari;Gharyan;Idri;Misratah;Benghazi;Ajdabiya;Az Zawiyah;Darnah +BI;Bubanza;Bururi;Ngozi;Kayanza;Karuzi;Gitega;Makamba;Cibitoke;Isale;Ruyigi;Burundi;Rumonge;Kirundo;Muyinga;Muramvya;Rutana;Bujumbura;Cankuzo +MZ;Tete;Beira;Maputo;Quelimane;Chimoio;Pemba;Lichinga;Mozambique;Inhambane;Nampula;Xai-Xai +KG;Kulundu;Belovodskoe;Kerben;Kyrgyzstan;Kayyngdy;Kara-Balta;Kochkor;Kemin;Gulcho;Bishkek;Massy;Batken;Naryn;Aravan;Karakol;Baetov;Chuy;Kara-Suu;Jalal-Abad;Kara-Bak;Isfana;Buzhum;Toktogul;Kant;Talas;Eski-Nookat;Kazarman;Ala-Buka;At-Bashy;Teploklyuchenka;Ivanovka;Pokrovka;Osh;Bazar-Korgon;Kyzyl-Adyr;Kyzyl-Suu;Suzak;Bokonbaev;Uch-Korgon;Cholpon-Ata;Tup;Sokuluk;Lebedinovka;Kara-Kulja +GE;Khashuri;Kutaisi;Rustavi;Senaki;Zugdidi;Ozurgeti;Marneuli;Sokhumi;Chiatura;Poti;Zestaponi;Akhaltsikhe;Telavi;Sagarejo;Kaspi;Gori;Batumi;Ambrolauri;Tbilisi;Kobuleti;Mtskheta;Georgia;Samtredia +TD;Doba;Pala;Bongor;Faya;Mongo;Amdjarass;Goz-Beida;Moundou;Sarh;Moussoro;Chad;Massenya;Koumra;Bol;Abeche;N'Djamena;Ati;Biltine;Massakory;Bardai;Am-Timan;Fada;Lai;Mao +MR;Tidjikja;Nema;Kaedi;Dar Naim;Aleg;Mauritania;Tevragh Zeina;Rosso;Nouadhibou;Kiffa;Zouerate;Selibaby;Aioun;Atar;Akjoujt;Nouakchott;Arafat +AM;Ijevan;Artashat;Ashtarak;Gavarr;Armavir;Gyumri;Yerevan;Armenia;Yeghegnadzor;Hrazdan;Vanadzor;Kapan +NO;Skien;Rorvik;Mandal;Os;Porsgrunn;As;Svolvaer;Volda;Randaberg;Namsos;Rygge;Stavanger;Fredrikstad;Bryne;Voss;Hammerfest;Kristiansand;Oslo;Trondheim;Stord;Sandnes;Narvik;Kopervik;Kongsberg;Drammen;Tonsberg;Stjordal;Moss;Brumunddal;Lyngdal;Harstad;Mosjoen;Bodo;Egersund;Nittedal;Finnsnes;Ski;Alesund;Sandvika;Askim;Bergen;Alta;Kongsvinger;Arendal;Sarpsborg;Tromso;Hamar;Stange;Verdal;Lorenskog;Fauske;Haugesund;Sogndal;Kristiansund;Notodden;Grimstad;Lier;Orsta;Farsund;Gjovik;Kirkenes;Nesoddtangen;Forde;Molde;Asker;Elverum;Halden;Sogne;Vadso;Nannestad;Honefoss;Kleppe;Kragero;Vennesla;Lillehammer;Norway;Levanger;Floro;Eidsvoll;Royken;Steinkjer;Mo i Rana +NI;Altagracia;San Lorenzo;Palacaguina;El Almendro;San Isidro;Greytown;Managua;Ocotal;Tisma;Quezalguaque;Nueva Guinea;Bonanza;Nandaime;Masaya;San Ramon;Santa Rosa del Penon;Jalapa;Siuna;Chichigalpa;Rivas;Prinzapolka;Muy Muy;Potosi;El Ayote;Mulukuku;Rancho Grande;San Marcos;Ticuantepe;San Juan de Rio Coco;San Dionisio;Granada;Quilali;Bilwi;Condega;San Rafael del Sur;Esteli;Somotillo;Juigalpa;La Trinidad;Niquinohomo;San Carlos;Villa Sandino;Ciudad Sandino;El Jicaro;Matagalpa;Jinotepe;Jinotega;Tola;Moyogalpa;Muelle de los Bueyes;Waspan;San Miguelito;Pueblo Nuevo;Matiguas;El Tortuguero;Corinto;San Francisco Libre;Santo Domingo;Rio Blanco;Telica;Sebaco;Villa El Carmen;Totogalpa;El Viejo;Esquipulas;Villanueva;Acoyapa;Terrabona;El Realejo;Masatepe;El Cua;El Rama;El Crucero;Belen;San Jose de Bocay;Murra;Kukrahill;Tipitapa;Wiwili;El Jicaral;Camoapa;San Fernando;Comalapa;San Lucas;San Sebastian de Yali;San Rafael del Norte;Bocana de Paiwas;San Juan de Limay;Nagarote;Nicaragua;Rosita;Teustepe;Boaco;El Sauce;Telpaneca;Posoltega;Achuapa;Yalaguina;Ciudad Dario;La Concepcion;Mateare;La Libertad;La Paz Centro;Nindiri;La Cruz de Rio Grande;Somoto;Chinandega;Leon;Diriomo;Bluefields;Nandasmo;San Juan del Sur;Diriamba +TM;Ashgabat;Mary;Anew;Turkmenistan;Balkanabat;Turkmenabat;Dasoguz +NE;Dosso;Maradi;Gaya;Nguigmi;Arlit;Tillaberi;Niger;Tahoua;Niamey;Diffa;Madaoua;Birnin Konni;Agadez;Guidan Roumdji;Goure;Zinder +LR;Robertsport;Monrovia;Harper;Gbarnga;Cestos City;Voinjama;Tubmanburg;Liberia;Zwedru;Sanniquellie;Bopolu;Barclayville;Kakata;Buchanan;Greenville;Fish Town;Bensonville +HT;Jacmel;Haiti;Port-au-Prince;Cap-Haitien;Gonaives;Miragoane;Hinche;Jeremie;Fort Liberte;Les Cayes;Port-de-Paix +ER;Keren;Massawa;Eritrea;Barentu;Assab;Mendefera;Asmara +SL;Freetown;Bo;Sierra Leone;Kenema;Makeni;Port Loko +LA;Salavan;Savannakhet;Louang Namtha;Xam Nua;Ban Houayxay;Pakxe;Laos;Xekong;Xaignabouli;Phon-Hong;Phongsali;Thakhek;Pakxan;Muang Sing;Phonsavan;Attapu;Anouvong;Xai;Vientiane;Louangphabang +LV;Aizkraukle;Liepaja;Kuldiga;Riga;Valmiera;Marupe;Preili;Ogre;Madona;Saulkrasti;Livani;Tukums;Aluksne;Varaklani;Adazi;Saldus;Dobele;Latvia;Rezekne;Valka;Balvi;Kraslava;Kekava;Ludza;Salaspils;Limbazi;Cesis;Ropazi;Sigulda;Daugavpils;Talsi;Olaine;Jurmala;Gulbene;Smiltene;Jekabpils;Ventspils;Bauska;Ulbroka;Jelgava +CF;Bria;Bimbo;Mbaiki;Bangassou;Bouar;Mobaye;Bossangoa;Berberati;Bangui;Kaga Bandoro;Sibut;Obo;Birao;Nola;Central African Republic;Bozoum;Ndele;Bambari +TJ;Istaravshan;Buston;Shahrinav;Somoniyon;Norak;Vakhsh;Hulbuk;Tursunzoda;Fayzobod;Hisor;Vanj;Konibodom;Danghara;Vahdat;Ayni;Dushanbe;Jilikul;Levakant;Khovaling;Moskva;Khorugh;Ghafurov;Obikiik;Abdurahmoni Jomi;Roghun;Shahritus;Dusti;Mu'minobod;Bokhtar;Shurobod;Ghonchi;Shahriston;Panj;Tajikistan;Yovon;Farkhor;Panjakent;Khujand;Isfara;Rasht;Kulob +NP;Godawari;Biratnagar;Pokhara;Janakpur;Butwal;Bhairahawa;Birendranagar;Kathmandu;Nepal;Hetauda +GA;Koulamoutou;Oyem;Makokou;Lambarene;Port-Gentil;Franceville;Gabon;Tchibanga;Mouila;Libreville +HR;Viskovo;Varazdin;Belisce;Pozega;Solin;Slavonski Brod;Pula;Labin;Vrbovec;Kastav;Cepin;Pazin;Metkovic;Podstrana;Kutina;Duga Resa;Novska;Crikvenica;Rovinj;Sisak;Gospic;Popovaca;Imotski;Trogir;Nova Gradiska;Garesnica;Umag;Koprivnica;Ivanec;Zadar;Slatina;Valpovo;Sibenik;Jastrebarsko;Zupanja;Daruvar;Bjelovar;Novi Marof;Vodice;Sveti Ivan Zelina;Dubrovnik;Porec;Sinj;Ivanic-Grad;Brdovec;Dakovo;Knin;Croatia;Ogulin;Cakovec;Pleternica;Zagreb;Osijek;Nasice;Virovitica;Krizevci;Opatija;Matulji;Rijeka;Nedelisce;Velika Gorica;Samobor;Vinkovci;Krapina;Zapresic;Omis;Split;Zabok;Dugo Selo;Vukovar;Makarska;Benkovac;Petrinja +LT;Taurage;Pagegiai;Kaunas;Kedainiai;Radviliskis;Birstonas;Visaginas;Silute;Zarasai;Salcininkai;Joniskis;Elektrenai;Ignalina;Plunge;Prienai;Sakiai;Skuodas;Naujoji Akmene;Kazlu Ruda;Vilkaviskis;Svencionys;Kaisiadorys;Klaipeda;Lazdijai;Mazeikiai;Rokiskis;Kelme;Marijampole;Silale;Druskininkai;Utena;Anyksciai;Varena;Pakruojis;Vilnius;Lithuania;Raseiniai;Alytus;Ukmerge;Trakai;Siauliai;Kupiskis;Telsiai;Rietavas;Jurbarkas;Birzai;Kalvarija;Nida;Panevezys;Moletai;Pasvalys;Palanga;Jonava;Sirvintos;Kretinga +MD;Telenesti;Comrat;Criuleni;Nisporeni;Moldova;Cocieri;Drochia;Briceni;Soldanesti;Calarasi;Floresti;Causeni;Hincesti;Soroca;Balti;Edinet;Basarabeasca;Glodeni;Chisinau;Cantemir;Falesti;Stefan Voda;Taraclia;Donduseni;Riscani;Ialoveni;Anenii Noi;Leova;Singerei;Cimislia;Bender;Tiraspol;Orhei;Ungheni;Rezina;Straseni;Ocnita;Cahul +PG;Kundiawa;Port Moresby;Wewak;Alotau;Madang;Buka;Wabag;Vanimo;Tari;Lorengau;Kimbe;Goroka;Papua New Guinea;Daru;Popondetta;Kurumul;Kokopo;Lae;Kavieng;Mendi;Kerema;Mount Hagen +BJ;Ouidah;Kandi;Savalou;Parakou;Abomey;Djougou;Natitingou;Porto-Novo;Sakete;Lokossa;Benin;Dogbo;Cotonou +BH;Manama +EE;Kohtla-Jarve;Narva;Johvi;Paide;Rakvere;Tartu;Tallinn;Kardla;Viljandi;Parnu;Keila;Sillamae;Polva;Estonia;Jogeva;Rapla;Valga;Haapsalu;Kuressaare;Maardu;Voru +DJ;Dikhil;Djibouti;Ali Sabieh;Obock;Arta;Tadjourah +TN;Kasserine;Monastir;Kairouan;Sidi Bouzid;Gafsa;Sousse;Ben Arous;Beja;Tataouine;Manouba;El Kef;Medenine;Zaghouan;Jendouba;Kebili;Gabes;Ariana;Tunis;Bizerte;Tozeur;Sfax;Tunisia;Siliana;Mahdia;Nabeul +XG;Gaza +JM;Falmouth;Lucea;Port Antonio;Morant Bay;Kingston;Spanish Town;Jamaica;Port Maria;Savanna-la-Mar;Mandeville;May Pen;Half Way Tree;Black River;Saint Ann's Bay;Montego Bay +MK;Zelenikovo;Brvenica;Vrapciste;Makedonska Kamenica;Novo Selo;Bogovinje;Cucer-Sandevo;Rosoman;Kicevo;Kocani;Gostivar;Lipkovo;Vasilevo;Stip;Petrovec;Jegunovce;Konce;Pehcevo;Ilinden;Gradsko;North Macedonia;Bogdanci;Plasnica;Lozovo;Star Dojran;Kratovo;Centar Zupa;Radovis;Debar;Kumanovo;Ohrid;Vevcani;Sopiste;Rankovce;Sveti Nikole;Veles;Tearce;Vinica;Makedonski Brod;Probistip;Kavadarci;Zrnovci;Rostusa;Prilep;Aracinovo;Berovo;Zelino;Bosilovo;Struga;Mogila;Studenicani;Novaci;Strumica;Tetovo;Belcista;Bitola;Demir Kapija;Resen;Gevgelija;Dolneni;Krusevo;Valandovo;Skopje;Kriva Palanka;Demir Hisar;Staro Nagoricane;Delcevo;Krivogastani;Negotino;Karbinci;Oblesevo +GW;Cacheu;Guinea-Bissau;Quinhamel;Bissau;Buba;Gabu;Farim;Catio;Bolama;Bafata +MT;Victoria;Lija;Safi;Valletta;Ghasri;Gzira;Ghaxaq;Ghajnsielem;Mellieha;Tarxien;Xewkija;Imtarfa;Mosta;San Giljan;Siggiewi;Iklin;Qormi;Qrendi;Ta' Xbiex;Santa Lucija;Imsida;Gharb;Nadur;Birzebbuga;Xaghra;Marsaskala;Saint Paul's Bay;Imdina;Gudja;Sliema;Swieqi;Marsaxlokk;Attard;San Gwann;Floriana;Birkirkara;Fontana;Imqabba;Vittoriosa;Senglea;Marsa;Qala;Munxar;Dingli;Balzan;Cospicua;Fgura;Luqa;Zurrieq;San Lawrenz;Zabbar;Imgarr;Rabat;Pieta;Kalkara;Gharghur;Naxxar;Sannat;Malta;Hamrun;Kercem;Zebbug;Kirkop;Zejtun;Pembroke;Xghajra;Santa Venera;Paola +PY;Caazapa;Fuerte Olimpo;Filadelfia;Villa Hayes;Pilar;San Pedro de Ycuamandiyu;Aregua;Caacupe;Ciudad del Este;Pedro Juan Caballero;Concepcion;Asuncion;Encarnacion;Paraguari;Villarrica;Paraguay;San Juan Bautista;Salto del Guaira;Coronel Oviedo +SK;Hlohovec;Poprad;Nove Zamky;Dunajska Streda;Presov;Senica;Ziar nad Hronom;Sabinov;Malacky;Rimavska Sobota;Detva;Vel'ky Krtis;Michalovce;Humenne;Piest'any;Trebisov;Bratislava;Kysucke Nove Mesto;Pezinok;Zvolen;Liptovsky Mikulas;Senec;Partizanske;Zilina;Svidnik;Puchov;Vranov nad Topl'ou;Zlate Moravce;Trencin;Ruzomberok;Banska Stiavnica;Bardejov;Dolny Kubin;Nitra;Lucenec;Revuca;Slovakia;Topol'cany;Spisska Nova Ves;Galanta;Snina;Skalica;Cadca;Nove Mesto nad Vahom;Prievidza;Povazska Bystrica;Banska Bystrica;Levoca;Stropkov;Kosice;Bytca;Levice;Brezno;Trnava;Sal'a;Kezmarok;Tvrdosin;Banovce nad Bebravou;Myjava;Martin;Roznava;Stara L'ubovna +SS;Wau;South Sudan;Bor;Malakal;Rumbek;Aweil;Juba;Kuacjok;Yambio;Torit;Bentiu +CH;Aarau;Altdorf;Rheinfelden;Kussnacht;Switzerland;Einsiedeln;Solothurn;Rapperswil-Jona;Brig-Glis;Herisau;Rorschach;Yverdon-les-Bains;Geneva;Sankt Gallen;Schaffhausen;Wil;Hinwil;Chur;La Chaux-de-Fonds;Bern;Weinfelden;Lachen;Pfaffikon;Bremgarten;Glarus;Frauenfeld;Renens;Winterthur;Basel;Sion;Schwyz;Thun;Lausanne;Uster;Stans;Appenzell;Kreuzlingen;Altstatten;Davos;Sursee;Bellinzona;Zug;Lucerne;Neuchatel;Fribourg;Langnau;Liestal;Delemont;Sarnen;Zurich +NA;Otjiwarongo;Windhoek;Keetmanshoop;Oshakati;Eenhana;Gobabis;Outapi;Nkurenkuru;Omuthiya;Katima Mulilo;Opuwo;Rundu;Namibia;Mariental;Swakopmund +BA;Gradacac;Zepce;Banja Luka;Celic;Donji Vakuf;Kladanj;Doboj;Ilijas;Gacko;Zivinice;Jajce;Gracanica;Bileca;Odzak;Gradiska;Vares;Bugojno;Cazin;Tesanj;Lopare;Brcko;Vitez;Kljuc;Sarajevo;Posusje;Bihac;Tuzla;Vogosca;Banovici;Rogatica;Srebrenik;Prijedor;Visoko;Kiseljak;Bijeljina;Srbac;Fojnica;Olovo;Stolac;Bratunac;Sapna;Srebrenica;Vlasenica;Pale;Bosanska Krupa;Mostar;Lukavac;Maglaj;Breza;Capljina;Siroki Brijeg;Busovaca;Gorazde;Gornji Vakuf;Trebinje;Jablanica;Bosnia and Herzegovina;Travnik;Citluk;Livno;Novi Travnik;Orasje;Milici;Zenica;Zvornik;Sanski Most;Hadzici;Ilidza;Foca;Ljubuski;Derventa;Brod;Novi Grad +AL;Shkoder;Peshkopi;Albania;Lezhe;Vlore;Berat;Durres;Kukes;Korce;Elbasan;Gjirokaster;Fier;Tirana +GM;Mansa Konko;Basse Santa Su;Brikama;Kanifing;Kerewan;Gambia, The;Janjanbureh;Banjul +LS;Thaba-Tseka;Qacha's Nek;Mohale's Hoek;Mafeteng;Teyateyaneng;Quthing;Mokhotlong;Butha-Buthe;Lesotho;Leribe;Maseru +CY;Famagusta;Kyrenia;Cyprus;Larnaca;Limassol;Paphos;Nicosia +SV;Zacatecoluca;Sonsonate;Sensuntepeque;La Union;El Salvador;Ahuachapan;Santa Tecla;Chalatenango;San Francisco;San Vicente;San Miguel;Usulutan;Santa Ana;San Salvador;Cojutepeque +RE;Saint-Denis +GQ;Pale;Luba;Djibloho;Evinayong;Mongomo;Ebebiyin;Malabo;Equatorial Guinea;Bata +SI;Ljubljana;Vojnik;Piran;Slovenska Bistrica;Sredisce ob Dravi;Dobrovnik;Skofljica;Vransko;Race;Kamnik;Dragomer;Naklo;Starse;Trzin;Brezice;Sempeter pri Gorici;Nazarje;Kocevje;Prevalje;Grad;Lasko;Zgornja Hajdina;Radece;Ig;Gornja Radgona;Bohinjska Bistrica;Slovenske Konjice;Benedikt;Vrhnika;Podlehnik;Sveta Trojica v Slovenskih Goricah;Gornji Petrovci;Smarjeske Toplice;Trbovlje;Kidricevo;Polzela;Moravce;Velike Lasce;Cerkno;Kuzma;Cerklje na Gorenjskem;Markovci;Lukovica;Jesenice;Nova Gorica;Odranci;Recica;Destrnik;Apace;Salovci;Bistrica ob Sotli;Gorenja Vas;Kozje;Podcetrtek;Braslovce;Vitomarci;Hodos;Velenje;Beltinci;Trzic;Ravne na Koroskem;Mirna Pec;Mirna;Cirkulane;Vipava;Selnica ob Dravi;Majsperk;Videm pri Ptuju;Mislinja;Nova Vas;Verzej;Jursinci;Sencur;Bled;Ilirska Bistrica;Straza;Sevnica;Ziri;Maribor;Kranjska Gora;Sentrupert;Medvode;Ankaran;Spodnje Hoce;Komen;Dravograd;Zelezniki;Puconci;Izola;Krizevci;Crnomelj;Cerknica;Rogasovci;Trebnje;Metlika;Borovnica;Kranj;Gornji Grad;Kostanjevica na Krki;Vitanje;Prebold;Semic;Zagorje;Sodrazica;Sostanj;Crna na Koroskem;Zgornja Kungota;Videm;Brezovica;Ajdovscina;Radenci;Radlje ob Dravi;Lovrenc na Pohorju;Logatec;Muta;Sentjur;Luce;Kobilje;Turnisce;Skofja Loka;Solcava;Sentilj;Postojna;Mezica;Poljcane;Ljubno;Store;Zgornje Gorje;Zalec;Velika Polana;Pesnica;Dobrna;Novo Mesto;Ormoz;Vuzenica;Slovenia;Osilnica;Celje;Zgornje Jezersko;Tabor;Ljutomer;Skocjan;Smartno;Ribnica;Komenda;Oplotnica;Rogatec;Dol;Trnovska Vas;Krsko;Domzale;Zetale;Jurovski Dol;Podvelka;Ptuj;Sezana;Kostel;Kobarid;Tolmin;Smarje;Ruse;Zavrc;Miklavz na Dravskem Polju;Slovenj Gradec;Litija;Rogaska Slatina;Razkrizje;Miren;Horjul;Dobrova;Preddvor;Murska Sobota;Bovec;Mokronog;Tisina;Menges;Zrece;Kanal;Divaca;Gorisnica;Cankova;Crensovci;Zuzemberk;Stari Trg;Koper;Dolenjske Toplice;Ivancna Gorica;Spodnji Duplek;Vodice;Hrastnik;Tomaz pri Ormozu;Dobrovo;Hrib-Loski Potok;Lenart v Slovenskih Goricah;Cerkvenjak;Sveta Ana;Sveti Jurij;Dornava;Makole;Moravske-Toplice;Mozirje;Sentjernej;Lendava;Dobje;Pivka;Grosuplje;Idrija;Radovljica +BS;Nassau +MQ;Fort-de-France +BW;Selibe Phikwe;Kasane;Kanye;Masunga;Maun;Jwaneng;Serowe;Lobatse;Gaborone;Tshabong;Francistown;Botswana;Mochudi;Sowa Town;Molepolole;Ghanzi;Ramotswa +SR;Groningen;Onverwacht;Totness;Lelydorp;Albina;Nieuw Nickerie;Brokopondo;Paramaribo;Nieuw Amsterdam;Suriname +TL;Viqueque;Same;Manatuto;Lospalos;Baucau;Aileu;Timor-Leste;Maliana;Pante Macassar;Dili;Ainaro;Gleno;Suai;Liquica +XK;Kamenice;Peje;Lipjan;Istog;Podujeve;Pristina;Junik;Ranillug;Mitrovice;Gjilan;Shtime;Hani i Elezit;Ferizaj;Zubin Potok;Kosovo;Rahovec;Kllokot;Partesh;Gracanice;Mamushe;Kline;Decan;Fushe Kosove;Malisheve;Suhareke;Shterpce;Dragash;Prizren;Viti;Obiliq;Kacanik;Gjakove;Leposaviq;Zvecan;Vushtrri;Novoberde;Gllogovc;Skenderaj +GY;Vreed-en-Hoop;New Amsterdam;Lethem;Mahdia;Fort Wellington;Linden;Georgetown;Guyana;Mabaruma;Bartica +FJ;Suva +NC;We;Noumea;Kone;New Caledonia +ME;Savnik;Cetinje;Zabljak;Golubovci;Ulcinj;Rozaje;Herceg Novi;Pluzine;Danilovgrad;Plav;Pljevlja;Petnjica;Tivat;Andrijevica;Gusinje;Niksic;Montenegro;Budva;Bijelo Polje;Bar;Kolasin;Tuzi;Podgorica;Kotor;Mojkovac;Berane +CW;Willemstad +MU;Port Louis +IS;Reykjavik +MV;Viligili;Foammulah;Dhihdhoo;Hithadhoo;Felidhoo;Kudahuvadhoo;Rasdhoo;Eydhafushi;Veymandoo;Funadhoo;Thinadhoo;Fonadhoo;Mahibadhoo;Male;Muli;Nilandhoo;Thulusdhoo;Kulhudhuffushi;Manadhoo;Maldives;Ungoofaaru;Naifaru +LU;Capellen;Vianden;Echternach;Luxembourg;Grevenmacher;Remich;Mersch;Redange-sur-Attert;Clervaux;Esch-sur-Alzette;Dudelange;Wiltz;Diekirch;Differdange +PF;Papeete +BT;Daga;Samdrup Jongkhar;Lhuentse;Wangdue Phodrang;Samtse;Zhemgang;Bhutan;Tsirang;Thimphu;Sarpang;Mongar;Trongsa;Tsimasham;Paro;Jakar;Pemagatshel;Haa;Trashigang;Trashi Yangtse;Gasa;Punakha +TG;Atakpame;Sokode;Kara;Togo;Dapaong;Lome +SZ;Eswatini;Mbabane;Nhlangano;Lobamba;Manzini;Siteki +BB;Bridgetown +TT;Couva;Tunapuna;Scarborough;Rio Claro;Point Fortin;Port of Spain;Aranguez;Trinidad and Tobago;Siparia;Debe;Arima;San Fernando;Diego Martin;Princes Town;Chaguanas;Sangre Grande +SB;Honiara;Buala;Taro;Tigoa;Tulagi;Kirakira;Solomon Islands;Gizo;Auki;Lata +YT;Mamoudzou +CV;Espargos;Assomada;Calheta de Sao Miguel;Cidade Velha;Sao Filipe;Tarrafal;Praia;Pombas;Porto Ingles;Porto Novo;Mindelo;Cova Figueira;Joao Teves;Ponta do Sol;Pedra Badejo;Ribeira Brava;Sal Rei;Igreja;Picos;Ribeira Grande;Sao Domingos;Cabo Verde;Nova Sintra +LC;Castries +ST;Sao Tome and Principe;Santo Antonio;Sao Tome;Neves;Santana;Guadalupe;Trindade;Sao Joao dos Angolares +GF;Saint-Laurent-du-Maroni;Saint-Georges;Kourou;Sinnamary;Cayenne;Roura;French Guiana;Iracoubo +BZ;Corozal;San Ignacio;Belize City;Punta Gorda;Belmopan;Orange Walk;Belize;Dangriga +BN;Brunei;Bangar;Tutong;Kuala Belait;Bandar Seri Begawan +VU;Isangel;Lakatoro;Luganville;Saratamata;Vanuatu;Port-Vila;Sola +WS;Samoa;Saleaula;Safotulafai;Safotu;Lufilufi;Samamea;Satupa`itea;Apia;Vailoa;Mulifanua;Afega;Asau;Leulumoega +MC;Monaco +AW;Oranjestad +GI;Gibraltar +JE;Saint Helier +MH;Majuro +KM;Mutsamudu;Fomboni;Moroni;Comoros +KI;Tarawa +IM;Douglas +KY;George Town +SC;Victoria +TO;Nuku`alofa +AD;La Massana;Canillo;Ordino;Andorra;Escaldes-Engordany;Encamp;Andorra la Vella;Sant Julia de Loria +AG;Saint John's +GG;Saint Peter Port +GL;Qaqortoq;Nuuk;Sisimiut;Ilulissat;Aasiaat;Greenland +DM;Roseau +VC;Kingstown +FM;Kolonia;Colonia;Tofol;Palikir;Micronesia, Federated States of;Weno +FO;Sorvagur;Kunoy;Oyrarbakki;Skalavik;Fuglafjordhur;Toftir;Kirkja;Skopun;Kvivik;Sandur;Saltangara;Tvoroyri;Skuvoy;Hov;Vestmanna;Famjin;Torshavn;Husavik;Hvalba;Sandavagur;Klaksvik;Vagur;Hvannasund;Strendur;Sumba;Faroe Islands;Porkeri;Nordhragota;Vidhareidhi;Eidhi +KN;Basseterre +VG;Road Town +AS;Pago Pago +SM;Faetano;Serravalle;Borgo Maggiore;Fiorentino;San Marino;Domagnano;Montegiardino;Acquaviva;Chiesanuova +BQ;Oranjestad;The Bottom;Bonaire, Sint Eustatius, and Saba;Kralendijk +TV;Funafuti +LI;Balzers;Triesenberg;Ruggell;Schellenberg;Mauren;Triesen;Vaduz;Planken;Schaan;Eschen;Liechtenstein;Gamprin +MF;Marigot +PM;Saint-Pierre +GD;Saint George's +CK;Avarua +TC;Grand Turk +AI;The Valley +MP;Capitol Hill +BL;Gustavia +FK;Stanley +SX;Philipsburg +XR;Longyearbyen +CX;Flying Fish Cove +GU;Hagta +WF;Leava;Wallis and Futuna;Mata-Utu +BM;Hamilton +VA;Vatican City +NR;Yaren +SH;Edinburgh of the Seven Seas;Saint Helena, Ascension, and Tristan da Cunha;Jamestown;Georgetown +NU;Alofi +MS;Montserrat;Brades;Plymouth +NF;Kingston +GP;Basse-Terre +PW;Ngerulmud +PN;Adamstown +GS;King Edward Point +VI;Charlotte Amalie \ No newline at end of file diff --git a/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala b/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala new file mode 100644 index 000000000..a426703d6 --- /dev/null +++ b/dhp-pace-core/src/main/spark-2/eu/dnetlib/pace/util/SparkCompatUtils.scala @@ -0,0 +1,12 @@ +package eu.dnetlib.pace.util + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder} +import org.apache.spark.sql.types.StructType + +object SparkCompatUtils { + + def encoderFor(schema: StructType): ExpressionEncoder[Row] = { + RowEncoder(schema) + } +} \ No newline at end of file diff --git a/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala b/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala new file mode 100644 index 000000000..cbc454ae2 --- /dev/null +++ b/dhp-pace-core/src/main/spark-35/eu/dnetlib/pace/util/SparkCompatUtils.scala @@ -0,0 +1,12 @@ +package eu.dnetlib.pace.util + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder +import org.apache.spark.sql.types.StructType + +object SparkCompatUtils { + + def encoderFor(schema: StructType): ExpressionEncoder[Row] = { + ExpressionEncoder(schema) + } +} diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java index 80e349a3f..e62f742f8 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/clustering/ClusteringFunctionTest.java @@ -8,6 +8,7 @@ import org.junit.jupiter.api.Test; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.mongodb.connection.Cluster; import eu.dnetlib.pace.AbstractPaceTest; import eu.dnetlib.pace.common.AbstractPaceFunctions; @@ -177,41 +178,16 @@ public class ClusteringFunctionTest extends AbstractPaceTest { } @Test - public void testKeywordsClustering() { + public void legalnameClustering() { - final ClusteringFunction cf = new KeywordsClustering(params); - final String s = "Polytechnic University of Turin"; + final ClusteringFunction cf = new LegalnameClustering(params); + String s = "key::1 key::2 city::1"; System.out.println(s); System.out.println(cf.apply(conf, Lists.newArrayList(s))); - final String s1 = "POLITECNICO DI TORINO"; - System.out.println(s1); - System.out.println(cf.apply(conf, Lists.newArrayList(s1))); - - final String s2 = "Universita farmaceutica culturale di milano bergamo"; - System.out.println("s2 = " + s2); - System.out.println(cf.apply(conf, Lists.newArrayList(s2))); - - final String s3 = "universita universita milano milano"; - System.out.println("s3 = " + s3); - System.out.println(cf.apply(conf, Lists.newArrayList(s3))); - - final String s4 = "Politechniki Warszawskiej (Warsaw University of Technology)"; - System.out.println("s4 = " + s4); - System.out.println(cf.apply(conf, Lists.newArrayList(s4))); - - final String s5 = "İstanbul Ticarət Universiteti"; - System.out.println("s5 = " + s5); - System.out.println(cf.apply(conf, Lists.newArrayList(s5))); - - final String s6 = "National and Kapodistrian University of Athens"; - System.out.println("s6 = " + s6); - System.out.println(cf.apply(conf, Lists.newArrayList(s6))); - - final String s7 = "Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών"; - System.out.println("s7 = " + s7); - System.out.println(cf.apply(conf, Lists.newArrayList(s7))); - + s = "key::1 key::2 city::1 city::2"; + System.out.println(s); + System.out.println(cf.apply(conf, Lists.newArrayList(s))); } @Test diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java index 7fd81d975..4ec120f4a 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/common/PaceFunctionTest.java @@ -54,4 +54,47 @@ public class PaceFunctionTest extends AbstractPaceFunctions { System.out.println("Fixed aliases : " + fixAliases(TEST_STRING)); } + @Test + public void countryInferenceTest() { + assertEquals("IT", countryInference("UNKNOWN", "Università di Bologna")); + assertEquals("UK", countryInference("UK", "Università di Bologna")); + assertEquals("IT", countryInference("UNKNOWN", "Universiteé de Naples")); + assertEquals("UNKNOWN", countryInference("UNKNOWN", "Università del Lavoro")); + } + + @Test + public void cityInferenceTest() { + assertEquals("universita city::3181928", cityInference("Università di Bologna")); + assertEquals("university city::3170647", cityInference("University of Pisa")); + assertEquals("universita", cityInference("Università del lavoro")); + assertEquals("universita city::3173331 city::3169522", cityInference("Università di Modena e Reggio Emilia")); + } + + @Test + public void keywordInferenceTest() { + assertEquals("key::41 turin", keywordInference("Polytechnic University of Turin")); + assertEquals("key::41 torino", keywordInference("POLITECNICO DI TORINO")); + assertEquals( + "key::1 key::60 key::81 milano bergamo", + keywordInference("Universita farmaceutica culturale di milano bergamo")); + assertEquals("key::1 key::1 milano milano", keywordInference("universita universita milano milano")); + assertEquals( + "key::10 kapodistriako panepistemio athenon", + keywordInference("Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών")); + } + + @Test + public void cityKeywordInferenceTest() { + assertEquals("key::41 city::3165524", cityKeywordInference("Polytechnic University of Turin")); + assertEquals("key::41 city::3165524", cityKeywordInference("POLITECNICO DI TORINO")); + assertEquals( + "key::1 key::60 key::81 city::3173435 city::3182164", + cityKeywordInference("Universita farmaceutica culturale di milano bergamo")); + assertEquals( + "key::1 key::1 city::3173435 city::3173435", cityKeywordInference("universita universita milano milano")); + assertEquals( + "key::10 kapodistriako panepistemio city::264371", + cityKeywordInference("Εθνικό και Καποδιστριακό Πανεπιστήμιο Αθηνών")); + } + } diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java index 8e72f4efc..c008902c4 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/comparators/ComparatorTest.java @@ -35,6 +35,7 @@ public class ComparatorTest extends AbstractPaceTest { params.put("name_th", "0.95"); params.put("jpath_value", "$.value"); params.put("jpath_classid", "$.qualifier.classid"); + params.put("codeRegex", "key::\\d+"); } @Test @@ -44,52 +45,23 @@ public class ComparatorTest extends AbstractPaceTest { } @Test - public void cityMatchTest() { - final CityMatch cityMatch = new CityMatch(params); + public void codeMatchTest() { + CodeMatch codeMatch = new CodeMatch(params); - // both names with no cities - assertEquals(1.0, cityMatch.distance("Università", "Centro di ricerca", conf)); + // both names with no codes + assertEquals(1.0, codeMatch.distance("testing1", "testing2", conf)); - // one of the two names with no cities - assertEquals(-1.0, cityMatch.distance("Università di Bologna", "Centro di ricerca", conf)); + // one of the two names with no codes + assertEquals(-1.0, codeMatch.distance("testing1 key::1", "testing", conf)); - // both names with cities (same) - assertEquals(1.0, cityMatch.distance("Universita di Bologna", "Biblioteca di Bologna", conf)); + // both names with codes (same) + assertEquals(1.0, codeMatch.distance("testing1 key::1", "testing2 key::1", conf)); - // both names with cities (different) - assertEquals(0.0, cityMatch.distance("Universita di Bologna", "Universita di Torino", conf)); - assertEquals(0.0, cityMatch.distance("Franklin College", "Concordia College", conf)); + // both names with codes (different) + assertEquals(0.0, codeMatch.distance("testing1 key::1", "testing2 key::2", conf)); - // particular cases - assertEquals(1.0, cityMatch.distance("Free University of Bozen-Bolzano", "Università di Bolzano", conf)); - assertEquals( - 1.0, - cityMatch - .distance( - "Politechniki Warszawskiej (Warsaw University of Technology)", "Warsaw University of Technology", - conf)); - - // failing becasuse 'Allen' is a transliterrated greek stopword - // assertEquals(-1.0, cityMatch.distance("Allen (United States)", "United States Military Academy", conf)); - assertEquals(-1.0, cityMatch.distance("Washington (United States)", "United States Military Academy", conf)); - } - - @Test - public void keywordMatchTest() { - params.put("threshold", "0.5"); - - final KeywordMatch keywordMatch = new KeywordMatch(params); - - assertEquals( - 0.5, keywordMatch.distance("Biblioteca dell'Universita di Bologna", "Università di Bologna", conf)); - assertEquals(1.0, keywordMatch.distance("Universita degli studi di Pisa", "Universita di Pisa", conf)); - assertEquals(1.0, keywordMatch.distance("Polytechnic University of Turin", "POLITECNICO DI TORINO", conf)); - assertEquals(1.0, keywordMatch.distance("Istanbul Commerce University", "İstanbul Ticarət Universiteti", conf)); - assertEquals(1.0, keywordMatch.distance("Franklin College", "Concordia College", conf)); - assertEquals(2.0 / 3.0, keywordMatch.distance("University of Georgia", "Georgia State University", conf)); - assertEquals(0.5, keywordMatch.distance("University College London", "University of London", conf)); - assertEquals(0.5, keywordMatch.distance("Washington State University", "University of Washington", conf)); - assertEquals(-1.0, keywordMatch.distance("Allen (United States)", "United States Military Academy", conf)); + // both names with codes (1 same, 1 different) + assertEquals(0.5, codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf)); } @@ -155,15 +127,15 @@ public class ComparatorTest extends AbstractPaceTest { } @Test - public void jaroWinklerNormalizedNameTest() { + public void jaroWinklerLegalnameTest() { - final JaroWinklerNormalizedName jaroWinklerNormalizedName = new JaroWinklerNormalizedName(params); + final JaroWinklerLegalname jaroWinklerLegalname = new JaroWinklerLegalname(params); - double result = jaroWinklerNormalizedName - .distance("AT&T (United States)", "United States Military Academy", conf); + double result = jaroWinklerLegalname + .distance("AT&T (United States)", "United States key::2 key::1", conf); System.out.println("result = " + result); - result = jaroWinklerNormalizedName.distance("NOAA - Servicio Meteorol\\u00f3gico Nacional", "NOAA - NWS", conf); + result = jaroWinklerLegalname.distance("NOAA - Servicio Meteorol\\u00f3gico Nacional", "NOAA - NWS", conf); System.out.println("result = " + result); } @@ -344,13 +316,13 @@ public class ComparatorTest extends AbstractPaceTest { double result = countryMatch.distance("UNKNOWN", "UNKNOWN", conf); assertEquals(-1.0, result); - result = countryMatch.distance("CHILE", "UNKNOWN", conf); + result = countryMatch.distance("CL", "UNKNOWN", conf); assertEquals(-1.0, result); - result = countryMatch.distance("CHILE", "ITALY", conf); + result = countryMatch.distance("CL", "IT", conf); assertEquals(0.0, result); - result = countryMatch.distance("CHILE", "CHILE", conf); + result = countryMatch.distance("CL", "CL", conf); assertEquals(1.0, result); } diff --git a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java index 93db552c1..be5c1ebb9 100644 --- a/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java +++ b/dhp-pace-core/src/test/java/eu/dnetlib/pace/util/UtilTest.java @@ -11,6 +11,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import eu.dnetlib.pace.model.Person; +import jdk.nashorn.internal.ir.annotations.Ignore; public class UtilTest { diff --git a/dhp-shade-package/pom.xml b/dhp-shade-package/pom.xml new file mode 100644 index 000000000..d8e17ed46 --- /dev/null +++ b/dhp-shade-package/pom.xml @@ -0,0 +1,169 @@ + + + 4.0.0 + + eu.dnetlib.dhp + dhp + 1.2.5-SNAPSHOT + ../pom.xml + + + + dhp-shade-package + jar + + + + DHPSite + ${dhp.site.stage.path}/dhp-common + + + + This module create a jar of all module dependencies + + + + + + eu.dnetlib.dhp + dhp-actionmanager + ${project.version} + + + + + + + + + + + + + + + + + + + + + + + + + + + + eu.dnetlib.dhp + dhp-graph-mapper + ${project.version} + + + eu.dnetlib.dhp + dhp-graph-provision + ${project.version} + + + eu.dnetlib.dhp + dhp-impact-indicators + ${project.version} + + + eu.dnetlib.dhp + dhp-stats-actionsets + ${project.version} + + + eu.dnetlib.dhp + dhp-stats-hist-snaps + ${project.version} + + + eu.dnetlib.dhp + dhp-stats-monitor-irish + ${project.version} + + + eu.dnetlib.dhp + dhp-stats-promote + ${project.version} + + + eu.dnetlib.dhp + dhp-stats-update + ${project.version} + + + eu.dnetlib.dhp + dhp-swh + ${project.version} + + + eu.dnetlib.dhp + dhp-usage-raw-data-update + ${project.version} + + + eu.dnetlib.dhp + dhp-usage-stats-build + ${project.version} + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + package + + shade + + + + + eu.dnetlib.dhp.oa.dedup.SparkCreateSimRels + + + + + META-INF/cxf/bus-extensions.txt + + + + + *:* + + META-INF/maven/** + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com + repackaged.com.google.common + + com.google.common.** + + + + + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-actionmanager/pom.xml b/dhp-workflows/dhp-actionmanager/pom.xml index ce13502b6..e0bba29d2 100644 --- a/dhp-workflows/dhp-actionmanager/pom.xml +++ b/dhp-workflows/dhp-actionmanager/pom.xml @@ -51,48 +51,5 @@ hadoop-distcp - - eu.dnetlib - dnet-actionmanager-api - - - eu.dnetlib - dnet-actionmanager-common - - - eu.dnetlib - dnet-openaireplus-mapping-utils - - - saxonica - saxon - - - saxonica - saxon-dom - - - jgrapht - jgrapht - - - net.sf.ehcache - ehcache - - - org.springframework - spring-test - - - org.apache.* - * - - - apache - * - - - - diff --git a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java index 088e618c7..7ae2901e3 100644 --- a/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java +++ b/dhp-workflows/dhp-actionmanager/src/main/java/eu/dnetlib/dhp/actionmanager/ISClient.java @@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager; import java.io.Serializable; import java.io.StringReader; import java.util.List; -import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; @@ -22,7 +21,6 @@ import com.google.common.base.Splitter; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import eu.dnetlib.actionmanager.rmi.ActionManagerException; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -65,7 +63,7 @@ public class ISClient implements Serializable { .map(t -> buildDirectory(basePath, t)) .collect(Collectors.toList())) .orElseThrow(() -> new IllegalStateException("empty set list")); - } catch (ActionManagerException | ISLookUpException e) { + } catch (ISLookUpException e) { throw new IllegalStateException("unable to query ActionSets info from the IS"); } } @@ -89,31 +87,18 @@ public class ISClient implements Serializable { return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight()); } - private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException { + private String getBasePathHDFS(ISLookUpService isLookup) throws ISLookUpException { return queryServiceProperty(isLookup, "basePath"); } private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) - throws ActionManagerException { + throws ISLookUpException { final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" + propertyName + "']/@value/string()"; log.debug("quering for service property: {}", q); - try { - final List value = isLookup.quickSearchProfile(q); - return Iterables.getOnlyElement(value); - } catch (ISLookUpException e) { - String msg = "Error accessing service profile, using query: " + q; - log.error(msg, e); - throw new ActionManagerException(msg, e); - } catch (NoSuchElementException e) { - String msg = "missing service property: " + propertyName; - log.error(msg, e); - throw new ActionManagerException(msg, e); - } catch (IllegalArgumentException e) { - String msg = "found more than one service property: " + propertyName; - log.error(msg, e); - throw new ActionManagerException(msg, e); - } + + final List value = isLookup.quickSearchProfile(q); + return Iterables.getOnlyElement(value); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java index 70ca1576c..88d62ea33 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipaffiliations/PrepareAffiliationRelations.java @@ -10,7 +10,6 @@ import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.BZip2Codec; -import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java index 040c89782..c1e0c4d68 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/bipfinder/SparkAtomicActionScoreJob.java @@ -10,6 +10,7 @@ import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.BZip2Codec; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; @@ -83,7 +84,7 @@ public class SparkAtomicActionScoreJob implements Serializable { resultsRDD .union(projectsRDD) .saveAsHadoopFile( - outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); }); } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java index a88607986..be22077c3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/PrepareSDGSparkJob.java @@ -6,26 +6,23 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.MapGroupsFunction; -import org.apache.spark.sql.Dataset; -import org.apache.spark.sql.Encoders; -import org.apache.spark.sql.SaveMode; -import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.*; +import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import eu.dnetlib.dhp.actionmanager.createunresolvedentities.model.SDGDataModel; import eu.dnetlib.dhp.application.ArgumentApplicationParser; -import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.StructuredProperty; import eu.dnetlib.dhp.schema.oaf.Subject; -import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import eu.dnetlib.dhp.utils.DHPUtils; public class PrepareSDGSparkJob implements Serializable { @@ -52,42 +49,91 @@ public class PrepareSDGSparkJob implements Serializable { final String outputPath = parser.get("outputPath"); log.info("outputPath: {}", outputPath); + final Boolean distributeDOI = Optional + .ofNullable(parser.get("distributeDoi")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("distribute doi {}", distributeDOI); + SparkConf conf = new SparkConf(); runWithSparkSession( conf, isSparkSessionManaged, spark -> { - doPrepare( - spark, - sourcePath, + if (distributeDOI) + doPrepare( + spark, + sourcePath, + + outputPath); + else + doPrepareoaid(spark, sourcePath, outputPath); - outputPath); }); } private static void doPrepare(SparkSession spark, String sourcePath, String outputPath) { - Dataset sdgDataset = readPath(spark, sourcePath, SDGDataModel.class); + Dataset sdgDataset = spark + .read() + .format("csv") + .option("sep", DEFAULT_DELIMITER) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); sdgDataset - .groupByKey((MapFunction) r -> r.getDoi().toLowerCase(), Encoders.STRING()) - .mapGroups((MapGroupsFunction) (k, it) -> { - Result r = new Result(); - r.setId(DHPUtils.generateUnresolvedIdentifier(k, DOI)); - SDGDataModel first = it.next(); - List sbjs = new ArrayList<>(); - sbjs.add(getSubject(first.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); - it - .forEachRemaining( - s -> sbjs - .add(getSubject(s.getSbj(), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); - r.setSubject(sbjs); - - return r; - }, Encoders.bean(Result.class)) + .groupByKey((MapFunction) v -> ((String) v.getAs("doi")).toLowerCase(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, + it) -> getResult( + DHPUtils + .generateUnresolvedIdentifier( + ModelSupport.entityIdPrefix.get(Result.class.getSimpleName().toLowerCase()) + "|" + k, + DOI), + it), + Encoders.bean(Result.class)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") .json(outputPath + "/sdg"); } + private static void doPrepareoaid(SparkSession spark, String sourcePath, String outputPath) { + Dataset sdgDataset = spark + .read() + .format("csv") + .option("sep", DEFAULT_DELIMITER) + .option("inferSchema", "true") + .option("header", "true") + .option("quotes", "\"") + .load(sourcePath); + ; + + sdgDataset + .groupByKey((MapFunction) r -> "50|" + ((String) r.getAs("oaid")), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) PrepareSDGSparkJob::getResult, Encoders.bean(Result.class)) + .write() + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .json(outputPath + "/sdg"); + } + + private static @NotNull Result getResult(String id, Iterator it) { + Result r = new Result(); + r.setId(id); + Row first = it.next(); + List sbjs = new ArrayList<>(); + sbjs.add(getSubject(first.getAs("sdg"), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID)); + it + .forEachRemaining( + s -> sbjs + .add(getSubject(s.getAs("sdg"), SDG_CLASS_ID, SDG_CLASS_NAME, UPDATE_SUBJECT_SDG_CLASS_ID))); + r.setSubject(sbjs); + + return r; + } + } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java index e86fccb84..fceed2008 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/fosnodoi/CreateActionSetSparkJob.java @@ -13,9 +13,6 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; -import org.apache.spark.api.java.JavaPairRDD; -import org.apache.spark.api.java.function.FilterFunction; -import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; import org.apache.spark.sql.SparkSession; @@ -24,13 +21,9 @@ import org.slf4j.LoggerFactory; import com.fasterxml.jackson.databind.ObjectMapper; -import eu.dnetlib.dhp.actionmanager.opencitations.model.COCI; import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.schema.action.AtomicAction; -import eu.dnetlib.dhp.schema.common.ModelConstants; -import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.Result; -import eu.dnetlib.dhp.schema.oaf.utils.*; import scala.Tuple2; public class CreateActionSetSparkJob implements Serializable { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/CoAuthorshipIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/CoAuthorshipIterator.java new file mode 100644 index 000000000..76e4c4851 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/CoAuthorshipIterator.java @@ -0,0 +1,80 @@ + +package eu.dnetlib.dhp.actionmanager.personentity; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.Person; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.utils.DHPUtils; + +public class CoAuthorshipIterator implements Iterator { + private int firstIndex; + private int secondIndex; + private boolean firstRelation; + private List authors; + private static final String PERSON_PREFIX = ModelSupport.getIdPrefix(Person.class) + "|orcid_______::"; + private static final String OPENAIRE_PREFIX = "openaire____"; + private static final String SEPARATOR = "::"; + private static final String ORCID_KEY = "10|" + OPENAIRE_PREFIX + SEPARATOR + + DHPUtils.md5(ModelConstants.ORCID.toLowerCase()); + public static final String ORCID_AUTHORS_CLASSID = "sysimport:crosswalk:orcid"; + public static final String ORCID_AUTHORS_CLASSNAME = "Imported from ORCID"; + + @Override + public boolean hasNext() { + return firstIndex < authors.size() - 1; + } + + @Override + public Relation next() { + Relation rel = null; + if (firstRelation) { + rel = getRelation(authors.get(firstIndex), authors.get(secondIndex)); + firstRelation = Boolean.FALSE; + } else { + rel = getRelation(authors.get(secondIndex), authors.get(firstIndex)); + firstRelation = Boolean.TRUE; + secondIndex += 1; + if (secondIndex >= authors.size()) { + firstIndex += 1; + secondIndex = firstIndex + 1; + } + } + + return rel; + } + + public CoAuthorshipIterator(List authors) { + this.authors = authors; + this.firstIndex = 0; + this.secondIndex = 1; + this.firstRelation = Boolean.TRUE; + + } + + private Relation getRelation(String orcid1, String orcid2) { + String source = PERSON_PREFIX + IdentifierFactory.md5(orcid1); + String target = PERSON_PREFIX + IdentifierFactory.md5(orcid2); + return OafMapperUtils + .getRelation( + source, target, ModelConstants.PERSON_PERSON_RELTYPE, + ModelConstants.PERSON_PERSON_SUBRELTYPE, + ModelConstants.PERSON_PERSON_HASCOAUTHORED, + Arrays.asList(OafMapperUtils.keyValue(ORCID_KEY, ModelConstants.ORCID_DS)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + ORCID_AUTHORS_CLASSID, ORCID_AUTHORS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), + "0.91"), + null); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Coauthors.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Coauthors.java new file mode 100644 index 000000000..17f46d5c7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Coauthors.java @@ -0,0 +1,20 @@ + +package eu.dnetlib.dhp.actionmanager.personentity; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import eu.dnetlib.dhp.schema.oaf.Relation; + +public class Coauthors implements Serializable { + private List coauthors; + + public List getCoauthors() { + return coauthors; + } + + public void setCoauthors(List coauthors) { + this.coauthors = coauthors; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Couples.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Couples.java new file mode 100644 index 000000000..d052b52b6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/Couples.java @@ -0,0 +1,40 @@ + +package eu.dnetlib.dhp.actionmanager.personentity; + +import java.io.Serializable; + +import eu.dnetlib.dhp.schema.oaf.Person; +import eu.dnetlib.dhp.schema.oaf.Relation; +import scala.Tuple2; + +public class Couples implements Serializable { + Person p; + Relation r; + + public Couples() { + + } + + public Person getP() { + return p; + } + + public void setP(Person p) { + this.p = p; + } + + public Relation getR() { + return r; + } + + public void setR(Relation r) { + this.r = r; + } + + public static Couples newInstance(Tuple2 couple) { + Couples c = new Couples(); + c.p = couple._1(); + c.r = couple._2(); + return c; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java new file mode 100644 index 000000000..d381ed176 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/ExtractPerson.java @@ -0,0 +1,437 @@ + +package eu.dnetlib.dhp.actionmanager.personentity; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.spark.sql.functions.*; + +import java.io.IOException; +import java.io.Serializable; +import java.util.*; +import java.util.stream.Collectors; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.*; +import org.apache.spark.sql.*; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.spark_project.jetty.util.StringUtil; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.collection.orcid.model.Author; +import eu.dnetlib.dhp.collection.orcid.model.Employment; +import eu.dnetlib.dhp.collection.orcid.model.Work; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.common.ModelSupport; +import eu.dnetlib.dhp.schema.oaf.KeyValue; +import eu.dnetlib.dhp.schema.oaf.Person; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; +import eu.dnetlib.dhp.utils.DHPUtils; +import scala.Tuple2; + +public class ExtractPerson implements Serializable { + private static final Logger log = LoggerFactory.getLogger(ExtractPerson.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final String OPENAIRE_PREFIX = "openaire____"; + private static final String SEPARATOR = "::"; + private static final String orcidKey = "10|" + OPENAIRE_PREFIX + SEPARATOR + + DHPUtils.md5(ModelConstants.ORCID.toLowerCase()); + + private static final String DOI_PREFIX = "50|doi_________::"; + + private static final String PMID_PREFIX = "50|pmid________::"; + private static final String ARXIV_PREFIX = "50|arXiv_______::"; + + private static final String PMCID_PREFIX = "50|pmcid_______::"; + private static final String ROR_PREFIX = "20|ror_________::"; + private static final String PERSON_PREFIX = ModelSupport.getIdPrefix(Person.class) + "|orcid_______"; + public static final String ORCID_AUTHORS_CLASSID = "sysimport:crosswalk:orcid"; + public static final String ORCID_AUTHORS_CLASSNAME = "Imported from ORCID"; + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + ExtractPerson.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json")))); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("inputPath"); + log.info("inputPath {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + final String workingDir = parser.get("workingDir"); + log.info("workingDir {}", workingDir); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); + createActionSet(spark, inputPath, outputPath, workingDir); + }); + + } + + private static void createActionSet(SparkSession spark, String inputPath, String outputPath, String workingDir) { + + Dataset authors = spark + .read() + .parquet(inputPath + "Authors") + .as(Encoders.bean(Author.class)); + + Dataset works = spark + .read() + .parquet(inputPath + "Works") + .as(Encoders.bean(Work.class)) + .filter( + (FilterFunction) w -> Optional.ofNullable(w.getPids()).isPresent() && + w + .getPids() + .stream() + .anyMatch( + p -> p.getSchema().equalsIgnoreCase("doi") || + p.getSchema().equalsIgnoreCase("pmc") || + p.getSchema().equalsIgnoreCase("pmid") || + p.getSchema().equalsIgnoreCase("arxiv"))); + + Dataset employmentDataset = spark + .read() + .parquet(inputPath + "Employments") + .as(Encoders.bean(Employment.class)); + + Dataset peopleToMap = authors + .joinWith(works, authors.col("orcid").equalTo(works.col("orcid"))) + .map((MapFunction, Author>) t2 -> t2._1(), Encoders.bean(Author.class)) + .groupByKey((MapFunction) a -> a.getOrcid(), Encoders.STRING()) + .mapGroups((MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Author.class)); + + Dataset employment = employmentDataset + .joinWith(peopleToMap, employmentDataset.col("orcid").equalTo(peopleToMap.col("orcid"))) + .map((MapFunction, Employment>) t2 -> t2._1(), Encoders.bean(Employment.class)); + + Dataset people; + peopleToMap.map((MapFunction) op -> { + Person person = new Person(); + person.setId(DHPUtils.generateIdentifier(op.getOrcid(), PERSON_PREFIX)); + person + .setBiography( + Optional + .ofNullable(op.getBiography()) + + .orElse("")); + KeyValue kv = OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS); + kv.setDataInfo(null); + person.setCollectedfrom(Arrays.asList(kv)); + person + .setAlternativeNames( + Optional + .ofNullable(op.getOtherNames()) + + .orElse(new ArrayList<>())); + person + .setFamilyName( + Optional + .ofNullable(op.getFamilyName()) + + .orElse("")); + person + .setGivenName( + Optional + .ofNullable(op.getGivenName()) + + .orElse("")); + person + .setPid( + Optional + .ofNullable(op.getOtherPids()) + .map( + v -> v + .stream() + .map( + p -> OafMapperUtils + .structuredProperty( + p.getValue(), p.getSchema(), p.getSchema(), ModelConstants.DNET_PID_TYPES, + ModelConstants.DNET_PID_TYPES, null)) + .collect(Collectors.toList())) + .orElse(new ArrayList<>())); + person + .getPid() + .add( + OafMapperUtils + .structuredProperty( + op.getOrcid(), ModelConstants.ORCID, ModelConstants.ORCID_CLASSNAME, + ModelConstants.DNET_PID_TYPES, ModelConstants.DNET_PID_TYPES, null)); + person.setDateofcollection(op.getLastModifiedDate()); + person.setOriginalId(Arrays.asList(op.getOrcid())); + return person; + }, Encoders.bean(Person.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingDir + "/people"); + + works + .flatMap( + (FlatMapFunction) ExtractPerson::getAuthorshipRelationIterator, + Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingDir + "/authorship"); + + Dataset coauthorship = works + .flatMap((FlatMapFunction>) w -> { + List> lista = new ArrayList<>(); + w.getPids().stream().forEach(p -> { + if (p.getSchema().equalsIgnoreCase("doi") || p.getSchema().equalsIgnoreCase("pmc") + || p.getSchema().equalsIgnoreCase("pmid") || p.getSchema().equalsIgnoreCase("arxiv")) + lista.add(new Tuple2<>(p.getValue(), w.getOrcid())); + }); + return lista.iterator(); + }, Encoders.tuple(Encoders.STRING(), Encoders.STRING())) + .groupByKey((MapFunction, String>) Tuple2::_1, Encoders.STRING()) + .mapGroups( + (MapGroupsFunction, Coauthors>) (k, it) -> extractCoAuthors(it), + Encoders.bean(Coauthors.class)) + .flatMap( + (FlatMapFunction) c -> new CoAuthorshipIterator(c.getCoauthors()), + Encoders.bean(Relation.class)) + .groupByKey((MapFunction) r -> r.getSource() + r.getTarget(), Encoders.STRING()) + .mapGroups( + (MapGroupsFunction) (k, it) -> it.next(), Encoders.bean(Relation.class)); + + coauthorship + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingDir + "/coauthorship"); + + employment + .filter((FilterFunction) e -> Optional.ofNullable(e.getAffiliationId()).isPresent()) + .filter((FilterFunction) e -> e.getAffiliationId().getSchema().equalsIgnoreCase("ror")) + .map( + (MapFunction) ExtractPerson::getAffiliationRelation, + Encoders.bean(Relation.class)) + .write() + .option("compression", "gzip") + .mode(SaveMode.Overwrite) + .json(workingDir + "/affiliation"); + + people = spark + .read() + .textFile(workingDir + "/people") + .map( + (MapFunction) value -> OBJECT_MAPPER + .readValue(value, Person.class), + Encoders.bean(Person.class)); + + people.show(false); + people + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p)) + .union( + getRelations(spark, workingDir + "/authorship").toJavaRDD().map(r -> new AtomicAction(r.getClass(), r))) + .union( + getRelations(spark, workingDir + "/coauthorship") + .toJavaRDD() + .map(r -> new AtomicAction(r.getClass(), r))) + .union( + getRelations(spark, workingDir + "/affiliation") + .toJavaRDD() + .map(r -> new AtomicAction(r.getClass(), r))) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile( + outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); + } + + private static Dataset getRelations(SparkSession spark, String path) { + return spark + .read() + .textFile(path) + .map( + (MapFunction) value -> OBJECT_MAPPER + .readValue(value, Relation.class), + Encoders.bean(Relation.class));// spark.read().json(path).as(Encoders.bean(Relation.class)); + } + + private static Coauthors extractCoAuthors(Iterator> it) { + Coauthors coauth = new Coauthors(); + List coauthors = new ArrayList<>(); + while (it.hasNext()) + coauthors.add(it.next()._2()); + coauth.setCoauthors(coauthors); + + return coauth; + } + + private static Relation getAffiliationRelation(Employment row) { + String source = PERSON_PREFIX + IdentifierFactory.md5(row.getOrcid()); + String target = ROR_PREFIX + + IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAffiliationId().getValue())); + List properties = new ArrayList<>(); + + Relation relation = OafMapperUtils + .getRelation( + source, target, ModelConstants.ORG_PERSON_RELTYPE, ModelConstants.ORG_PERSON_SUBRELTYPE, + ModelConstants.ORG_PERSON_PARTICIPATES, + Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + ORCID_AUTHORS_CLASSID, ORCID_AUTHORS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "0.91"), + null); + + if (Optional.ofNullable(row.getStartDate()).isPresent() && StringUtil.isNotBlank(row.getStartDate())) { + KeyValue kv = new KeyValue(); + kv.setKey("startDate"); + kv.setValue(row.getStartDate()); + properties.add(kv); + } + if (Optional.ofNullable(row.getEndDate()).isPresent() && StringUtil.isNotBlank(row.getEndDate())) { + KeyValue kv = new KeyValue(); + kv.setKey("endDate"); + kv.setValue(row.getEndDate()); + properties.add(kv); + } + + if (properties.size() > 0) + relation.setProperties(properties); + return relation; + + } + + private static Collection getCoAuthorshipRelations(String orcid1, String orcid2) { + String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid1); + String target = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid2); + + return Arrays + .asList( + OafMapperUtils + .getRelation( + source, target, ModelConstants.PERSON_PERSON_RELTYPE, + ModelConstants.PERSON_PERSON_SUBRELTYPE, + ModelConstants.PERSON_PERSON_HASCOAUTHORED, + Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + ORCID_AUTHORS_CLASSID, ORCID_AUTHORS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), + "0.91"), + null), + OafMapperUtils + .getRelation( + target, source, ModelConstants.PERSON_PERSON_RELTYPE, + ModelConstants.PERSON_PERSON_SUBRELTYPE, + ModelConstants.PERSON_PERSON_HASCOAUTHORED, + Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + ORCID_AUTHORS_CLASSID, ORCID_AUTHORS_CLASSNAME, + ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), + "0.91"), + null)); + + } + + private static @NotNull Iterator getAuthorshipRelationIterator(Work w) { + + if (Optional.ofNullable(w.getPids()).isPresent()) + return w + .getPids() + .stream() + .map(pid -> getRelation(w.getOrcid(), pid)) + .filter(Objects::nonNull) + .collect(Collectors.toList()) + .iterator(); + List ret = new ArrayList<>(); + return ret.iterator(); + } + + private static Relation getRelation(String orcid, eu.dnetlib.dhp.collection.orcid.model.Pid pid) { + String target; + String source = PERSON_PREFIX + "::" + IdentifierFactory.md5(orcid); + switch (pid.getSchema()) { + case "doi": + target = DOI_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.doi.toString(), pid.getValue())); + break; + case "pmid": + target = PMID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), pid.getValue())); + break; + case "arxiv": + target = ARXIV_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.arXiv.toString(), pid.getValue())); + break; + case "pmcid": + target = PMCID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), pid.getValue())); + break; + + default: + return null; + } + + return OafMapperUtils + .getRelation( + source, target, ModelConstants.RESULT_PERSON_RELTYPE, + ModelConstants.RESULT_PERSON_SUBRELTYPE, + ModelConstants.RESULT_PERSON_HASAUTHORED, + Arrays.asList(OafMapperUtils.keyValue(orcidKey, ModelConstants.ORCID_DS)), + OafMapperUtils + .dataInfo( + false, null, false, false, + OafMapperUtils + .qualifier( + ORCID_AUTHORS_CLASSID, ORCID_AUTHORS_CLASSNAME, ModelConstants.DNET_PROVENANCE_ACTIONS, + ModelConstants.DNET_PROVENANCE_ACTIONS), + "0.91"), + null); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/WorkList.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/WorkList.java new file mode 100644 index 000000000..92842bfcf --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/personentity/WorkList.java @@ -0,0 +1,25 @@ + +package eu.dnetlib.dhp.actionmanager.personentity; + +import java.io.Serializable; +import java.util.ArrayList; + +import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; + +import eu.dnetlib.dhp.collection.orcid.model.Work; + +public class WorkList implements Serializable { + private ArrayList workArrayList; + + public ArrayList getWorkArrayList() { + return workArrayList; + } + + public void setWorkArrayList(ArrayList workArrayList) { + this.workArrayList = workArrayList; + } + + public WorkList() { + workArrayList = new ArrayList<>(); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/sdgnodoi/CreateActionSetSparkJob.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/sdgnodoi/CreateActionSetSparkJob.java new file mode 100644 index 000000000..0bc3b524b --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/sdgnodoi/CreateActionSetSparkJob.java @@ -0,0 +1,91 @@ + +package eu.dnetlib.dhp.actionmanager.sdgnodoi; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; + +import java.io.IOException; +import java.io.Serializable; +import java.util.Objects; +import java.util.Optional; + +import org.apache.commons.cli.ParseException; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.Hdfs; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.SparkSession; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Result; +import scala.Tuple2; + +public class CreateActionSetSparkJob implements Serializable { + + private static final Logger log = LoggerFactory.getLogger(CreateActionSetSparkJob.class); + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(final String[] args) throws IOException, ParseException { + + final ArgumentApplicationParser parser = new ArgumentApplicationParser( + IOUtils + .toString( + Objects + .requireNonNull( + CreateActionSetSparkJob.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/fosnodoi/as_parameters.json")))); + + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + final String inputPath = parser.get("sourcePath"); + log.info("inputPath {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath {}", outputPath); + + SparkConf conf = new SparkConf(); + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + HdfsSupport.remove(outputPath, spark.sparkContext().hadoopConfiguration()); + createActionSet(spark, inputPath, outputPath); + }); + + } + + private static void createActionSet(SparkSession spark, String inputPath, String outputPath) { + spark + .read() + .textFile(inputPath) + .map( + (MapFunction) value -> OBJECT_MAPPER.readValue(value, Result.class), + Encoders.bean(Result.class)) + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile( + outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java index e163c8556..b5aed6ea2 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateActionSetFromWebEntries.java @@ -5,11 +5,10 @@ import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; import java.io.Serializable; import java.util.*; -import java.util.stream.Collectors; import org.apache.commons.io.IOUtils; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.io.compress.BZip2Codec; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.FilterFunction; @@ -113,7 +112,7 @@ public class CreateActionSetFromWebEntries implements Serializable { .mapToPair( aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), new Text(OBJECT_MAPPER.writeValueAsString(aa)))) - .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, GzipCodec.class); + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); } @@ -153,11 +152,40 @@ public class CreateActionSetFromWebEntries implements Serializable { .select("OpenAlexId"); } + private static List createAffiliationRelationPairPMCID(String pmcid, String ror) { + if (pmcid == null) + return new ArrayList<>(); + + return createAffiliatioRelationPair( + PMCID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.pmc.toString(), removeResolver("PMC", pmcid))), + ror); + } + + private static List createAffiliationRelationPairPMID(String pmid, String ror) { + if (pmid == null) + return new ArrayList<>(); + + return createAffiliatioRelationPair( + PMID_PREFIX + + IdentifierFactory + .md5(PidCleaner.normalizePidValue(PidType.pmid.toString(), removeResolver("PMID", pmid))), + ror); + } + private static String removeResolver(String pidType, String pid) { - if (pidType.equals("DOI")) { - return pid.substring(16); + switch (pidType) { + case "PMID": + return pid.substring(33); + case "PMC": + return "PMC" + pid.substring(43); + case "DOI": + return pid.substring(16); } - throw new IllegalArgumentException("DOI is the only supported PID type"); + + throw new RuntimeException(); + } private static List createAffiliationRelationPairDOI(String doi, String ror) { diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveRelationFromActionSet.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveRelationFromActionSet.java new file mode 100644 index 000000000..08d543218 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveRelationFromActionSet.java @@ -0,0 +1,158 @@ + +package eu.dnetlib.dhp.actionmanager.webcrawl; + +import static eu.dnetlib.dhp.common.SparkSessionSupport.runWithSparkSession; +import static org.apache.spark.sql.functions.*; + +import java.io.File; +import java.io.Serializable; +import java.util.Arrays; +import java.util.Optional; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.commons.io.filefilter.DirectoryFileFilter; +import org.apache.commons.io.filefilter.FileFileFilter; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.BZip2Codec; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.sql.*; +import org.apache.spark.sql.types.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.application.ArgumentApplicationParser; +import eu.dnetlib.dhp.common.HdfsSupport; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import scala.Tuple2; + +public class RemoveRelationFromActionSet + implements Serializable { + private static final Logger log = LoggerFactory.getLogger(CreateActionSetFromWebEntries.class); + + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final StructType KV_SCHEMA = StructType$.MODULE$ + .apply( + Arrays + .asList( + StructField$.MODULE$.apply("key", DataTypes.StringType, false, Metadata.empty()), + StructField$.MODULE$.apply("value", DataTypes.StringType, false, Metadata.empty()))); + + private static final StructType ATOMIC_ACTION_SCHEMA = StructType$.MODULE$ + .apply( + Arrays + .asList( + StructField$.MODULE$.apply("clazz", DataTypes.StringType, false, Metadata.empty()), + StructField$.MODULE$ + .apply( + "payload", DataTypes.StringType, false, Metadata.empty()))); + + public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + public static void main(String[] args) throws Exception { + String jsonConfiguration = IOUtils + .toString( + CreateActionSetFromWebEntries.class + .getResourceAsStream( + "/eu/dnetlib/dhp/actionmanager/webcrawl/as_parameters.json")); + + final ArgumentApplicationParser parser = new ArgumentApplicationParser(jsonConfiguration); + parser.parseArgument(args); + + Boolean isSparkSessionManaged = Optional + .ofNullable(parser.get("isSparkSessionManaged")) + .map(Boolean::valueOf) + .orElse(Boolean.TRUE); + + log.info("isSparkSessionManaged: {}", isSparkSessionManaged); + + // the actionSet path + final String inputPath = parser.get("sourcePath"); + log.info("inputPath: {}", inputPath); + + final String outputPath = parser.get("outputPath"); + log.info("outputPath: {}", outputPath); + + final String blackListInputPath = parser.get("blackListPath"); + log.info("blackListInputPath: {}", blackListInputPath); + + SparkConf conf = new SparkConf(); + + runWithSparkSession( + conf, + isSparkSessionManaged, + spark -> { + + removeFromActionSet(spark, inputPath, outputPath, blackListInputPath); + + }); + } + + private static void removeFromActionSet(SparkSession spark, String inputPath, String outputPath, + String blackListInputPath) { + // read the blacklist + Dataset blackList = readBlackList(spark, blackListInputPath) + .map( + (MapFunction) r -> IdentifierFactory + .idFromPid("50", "doi", ((String) r.getAs("doi")).substring(16), true), + Encoders.STRING()); + + // read the old actionset and get the relations in the payload + JavaPairRDD seq = JavaSparkContext + .fromSparkContext(spark.sparkContext()) + .sequenceFile(inputPath, Text.class, Text.class); + + JavaRDD rdd = seq + .map(x -> RowFactory.create(x._1().toString(), x._2().toString())); + + Dataset actionSet = spark + .createDataFrame(rdd, KV_SCHEMA) + .withColumn("atomic_action", from_json(col("value"), ATOMIC_ACTION_SCHEMA)) + .select(expr("atomic_action.*")); + + Dataset relation = actionSet + .map( + (MapFunction) r -> MAPPER.readValue((String) r.getAs("payload"), Relation.class), + Encoders.bean(Relation.class)); + + // select only the relation not matching any pid in the blacklist as source for the relation + Dataset relNoSource = relation + .joinWith(blackList, relation.col("source").equalTo(blackList.col("value")), "left") + .filter((FilterFunction>) t2 -> t2._2() == null) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)); + + // select only the relation not matching any pid in the blacklist as target of the relation + relNoSource + .joinWith(blackList, relNoSource.col("target").equalTo(blackList.col("value")), "left") + .filter((FilterFunction>) t2 -> t2._2() == null) + .map((MapFunction, Relation>) t2 -> t2._1(), Encoders.bean(Relation.class)) + .toJavaRDD() + .map(p -> new AtomicAction(p.getClass(), p)) + .mapToPair( + aa -> new Tuple2<>(new Text(aa.getClazz().getCanonicalName()), + new Text(OBJECT_MAPPER.writeValueAsString(aa)))) + .saveAsHadoopFile(outputPath, Text.class, Text.class, SequenceFileOutputFormat.class, BZip2Codec.class); + ; + + } + + private static Dataset readBlackList(SparkSession spark, String inputPath) { + + return spark + .read() + .json(inputPath) + .select("doi"); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java index 98caa1741..f63bfcb48 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/CollectorWorker.java @@ -22,9 +22,11 @@ import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; import eu.dnetlib.dhp.collection.plugin.base.BaseCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.file.FileCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.file.FileGZipCollectorPlugin; +import eu.dnetlib.dhp.collection.plugin.gtr2.Gtr2PublicationsCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MDStoreCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.mongodb.MongoDbDumpCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.oai.OaiCollectorPlugin; +import eu.dnetlib.dhp.collection.plugin.osf.OsfPreprintsCollectorPlugin; import eu.dnetlib.dhp.collection.plugin.rest.RestCollectorPlugin; import eu.dnetlib.dhp.common.aggregation.AggregatorReport; import eu.dnetlib.dhp.common.collection.CollectorException; @@ -58,7 +60,7 @@ public class CollectorWorker extends ReportingJob { public void collect() throws UnknownCollectorPluginException, CollectorException, IOException { - final String outputPath = mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME; + final String outputPath = this.mdStoreVersion.getHdfsPath() + SEQUENCE_FILE_NAME; log.info("outputPath path is {}", outputPath); final CollectorPlugin plugin = getCollectorPlugin(); @@ -68,36 +70,36 @@ public class CollectorWorker extends ReportingJob { try (SequenceFile.Writer writer = SequenceFile .createWriter( - fileSystem.getConf(), - SequenceFile.Writer.file(new Path(outputPath)), - SequenceFile.Writer.keyClass(IntWritable.class), - SequenceFile.Writer.valueClass(Text.class), + this.fileSystem.getConf(), SequenceFile.Writer.file(new Path(outputPath)), SequenceFile.Writer + .keyClass(IntWritable.class), + SequenceFile.Writer + .valueClass(Text.class), SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new DeflateCodec()))) { final IntWritable key = new IntWritable(counter.get()); final Text value = new Text(); plugin - .collect(api, report) - .forEach( - content -> { - key.set(counter.getAndIncrement()); - value.set(content); - try { - writer.append(key, value); - } catch (Throwable e) { - throw new RuntimeException(e); - } - }); - } catch (Throwable e) { - report.put(e.getClass().getName(), e.getMessage()); + .collect(this.api, this.report) + .forEach(content -> { + key.set(counter.getAndIncrement()); + value.set(content); + try { + writer.append(key, value); + } catch (final Throwable e) { + throw new RuntimeException(e); + } + }); + } catch (final Throwable e) { + this.report.put(e.getClass().getName(), e.getMessage()); throw new CollectorException(e); } finally { shutdown(); - report.ongoing(counter.longValue(), counter.longValue()); + this.report.ongoing(counter.longValue(), counter.longValue()); } } - private void scheduleReport(AtomicInteger counter) { + private void scheduleReport(final AtomicInteger counter) { schedule(new ReporterCallback() { + @Override public Long getCurrent() { return counter.longValue(); @@ -112,33 +114,37 @@ public class CollectorWorker extends ReportingJob { private CollectorPlugin getCollectorPlugin() throws UnknownCollectorPluginException { - switch (CollectorPlugin.NAME.valueOf(api.getProtocol())) { + switch (CollectorPlugin.NAME.valueOf(this.api.getProtocol())) { case oai: - return new OaiCollectorPlugin(clientParams); + return new OaiCollectorPlugin(this.clientParams); case rest_json2xml: - return new RestCollectorPlugin(clientParams); + return new RestCollectorPlugin(this.clientParams); case file: - return new FileCollectorPlugin(fileSystem); + return new FileCollectorPlugin(this.fileSystem); case fileGzip: - return new FileGZipCollectorPlugin(fileSystem); + return new FileGZipCollectorPlugin(this.fileSystem); case baseDump: return new BaseCollectorPlugin(this.fileSystem); + case gtr2Publications: + return new Gtr2PublicationsCollectorPlugin(this.clientParams); + case osfPreprints: + return new OsfPreprintsCollectorPlugin(this.clientParams); case other: final CollectorPlugin.NAME.OTHER_NAME plugin = Optional - .ofNullable(api.getParams().get("other_plugin_type")) + .ofNullable(this.api.getParams().get("other_plugin_type")) .map(CollectorPlugin.NAME.OTHER_NAME::valueOf) .orElseThrow(() -> new IllegalArgumentException("invalid other_plugin_type")); switch (plugin) { case mdstore_mongodb_dump: - return new MongoDbDumpCollectorPlugin(fileSystem); + return new MongoDbDumpCollectorPlugin(this.fileSystem); case mdstore_mongodb: return new MDStoreCollectorPlugin(); default: throw new UnknownCollectorPluginException("plugin is not managed: " + plugin); } default: - throw new UnknownCollectorPluginException("protocol is not managed: " + api.getProtocol()); + throw new UnknownCollectorPluginException("protocol is not managed: " + this.api.getProtocol()); } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java index df87e4333..a1545ebfe 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Author.java @@ -20,6 +20,9 @@ public class Author extends ORCIDItem { private String lastModifiedDate; + public Author() { + } + public String getBiography() { return biography; } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java index 6bc47bc26..419823cb1 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/ORCIDItem.java @@ -11,4 +11,7 @@ public class ORCIDItem { public void setOrcid(String orcid) { this.orcid = orcid; } + + public ORCIDItem() { + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java index 670170323..a8683aaaf 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/orcid/model/Work.java @@ -32,4 +32,6 @@ public class Work extends ORCIDItem { pids.add(pid); } + public Work() { + } } diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java index 97d2d2585..72e691579 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/CollectorPlugin.java @@ -11,7 +11,7 @@ public interface CollectorPlugin { enum NAME { - oai, other, rest_json2xml, file, fileGzip, baseDump; + oai, other, rest_json2xml, file, fileGzip, baseDump, gtr2Publications, osfPreprints; public enum OTHER_NAME { mdstore_mongodb_dump, mdstore_mongodb diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsCollectorPlugin.java new file mode 100644 index 000000000..15fdd8280 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsCollectorPlugin.java @@ -0,0 +1,43 @@ + +package eu.dnetlib.dhp.collection.plugin.gtr2; + +import java.util.Iterator; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +public class Gtr2PublicationsCollectorPlugin implements CollectorPlugin { + + private final HttpClientParams clientParams; + + public Gtr2PublicationsCollectorPlugin(final HttpClientParams clientParams) { + this.clientParams = clientParams; + } + + @Override + public Stream collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException { + + final String baseUrl = api.getBaseUrl(); + final String startPage = api.getParams().get("startPage"); + final String endPage = api.getParams().get("endPage"); + final String fromDate = api.getParams().get("fromDate"); + + if ((fromDate != null) && !fromDate.matches("\\d{4}-\\d{2}-\\d{2}")) { + throw new CollectorException("Invalid date (YYYY-MM-DD): " + fromDate); + } + + final Iterator iterator = new Gtr2PublicationsIterator(baseUrl, fromDate, startPage, endPage, + this.clientParams); + final Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED); + + return StreamSupport.stream(spliterator, false); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java new file mode 100644 index 000000000..5b8f48680 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIterator.java @@ -0,0 +1,215 @@ + +package eu.dnetlib.dhp.collection.plugin.gtr2; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Queue; +import java.util.function.Function; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.dom4j.Element; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; +import org.joda.time.format.DateTimeFormatter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class Gtr2PublicationsIterator implements Iterator { + + public static final int PAGE_SIZE = 20; + + private static final Logger log = LoggerFactory.getLogger(Gtr2PublicationsIterator.class); + + private final HttpConnector2 connector; + private static final DateTimeFormatter simpleDateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd"); + + private static final int MAX_ATTEMPTS = 10; + + private final String baseUrl; + private int currPage; + private int endPage; + private boolean incremental = false; + private DateTime fromDate; + + private final Map cache = new HashMap<>(); + + private final Queue queue = new LinkedList<>(); + + private String nextElement; + + public Gtr2PublicationsIterator(final String baseUrl, final String fromDate, final String startPage, + final String endPage, + final HttpClientParams clientParams) + throws CollectorException { + + this.baseUrl = baseUrl; + this.currPage = NumberUtils.toInt(startPage, 1); + this.endPage = NumberUtils.toInt(endPage, Integer.MAX_VALUE); + this.incremental = StringUtils.isNotBlank(fromDate); + this.connector = new HttpConnector2(clientParams); + + if (this.incremental) { + this.fromDate = parseDate(fromDate); + } + + prepareNextElement(); + } + + @Override + public boolean hasNext() { + return this.nextElement != null; + } + + @Override + public String next() { + try { + return this.nextElement; + } finally { + prepareNextElement(); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + private void prepareNextElement() { + while ((this.currPage <= this.endPage) && this.queue.isEmpty()) { + log.debug("FETCHING PAGE + " + this.currPage + "/" + this.endPage); + this.queue.addAll(fetchPage(this.currPage++)); + } + this.nextElement = this.queue.poll(); + } + + private List fetchPage(final int pageNumber) { + + final List res = new ArrayList<>(); + try { + final Document doc = loadURL(cleanURL(this.baseUrl + "/outcomes/publications?p=" + pageNumber), 0); + + if (this.endPage == Integer.MAX_VALUE) { + this.endPage = NumberUtils.toInt(doc.valueOf("/*/@*[local-name() = 'totalPages']")); + } + + for (final Object po : doc.selectNodes("//*[local-name() = 'publication']")) { + final Element mainEntity = (Element) ((Element) po).detach(); + + if (filterIncremental(mainEntity)) { + res.add(expandMainEntity(mainEntity)); + } else { + log.debug("Skipped entity"); + } + + } + } catch (final Throwable e) { + log.error("Exception fetching page " + pageNumber, e); + throw new RuntimeException("Exception fetching page " + pageNumber, e); + } + + return res; + } + + private void addLinkedEntities(final Element master, final String relType, final Element newRoot, + final Function mapper) { + + for (final Object o : master.selectNodes(".//*[local-name()='link']")) { + final String rel = ((Element) o).valueOf("@*[local-name()='rel']"); + final String href = ((Element) o).valueOf("@*[local-name()='href']"); + + if (relType.equals(rel) && StringUtils.isNotBlank(href)) { + final String cacheKey = relType + "#" + href; + if (this.cache.containsKey(cacheKey)) { + try { + log.debug(" * from cache (" + relType + "): " + href); + newRoot.add(DocumentHelper.parseText(this.cache.get(cacheKey)).getRootElement()); + } catch (final DocumentException e) { + log.error("Error retrieving cache element: " + cacheKey, e); + throw new RuntimeException("Error retrieving cache element: " + cacheKey, e); + } + } else { + final Document doc = loadURL(cleanURL(href), 0); + final Element elem = mapper.apply(doc); + newRoot.add(elem); + this.cache.put(cacheKey, elem.asXML()); + } + + } + } + } + + private boolean filterIncremental(final Element e) { + if (!this.incremental || isAfter(e.valueOf("@*[local-name() = 'created']"), this.fromDate) + || isAfter(e.valueOf("@*[local-name() = 'updated']"), this.fromDate)) { + return true; + } + return false; + } + + private String expandMainEntity(final Element mainEntity) { + final Element newRoot = DocumentHelper.createElement("doc"); + newRoot.add(mainEntity); + addLinkedEntities(mainEntity, "PROJECT", newRoot, this::asProjectElement); + return DocumentHelper.createDocument(newRoot).asXML(); + } + + private Element asProjectElement(final Document doc) { + final Element newOrg = DocumentHelper.createElement("project"); + newOrg.addElement("id").setText(doc.valueOf("/*/@*[local-name()='id']")); + newOrg + .addElement("code") + .setText(doc.valueOf("//*[local-name()='identifier' and @*[local-name()='type'] = 'RCUK']")); + newOrg.addElement("title").setText(doc.valueOf("//*[local-name()='title']")); + return newOrg; + } + + private static String cleanURL(final String url) { + String cleaned = url; + if (cleaned.contains("gtr.gtr")) { + cleaned = cleaned.replace("gtr.gtr", "gtr"); + } + if (cleaned.startsWith("http://")) { + cleaned = cleaned.replaceFirst("http://", "https://"); + } + return cleaned; + } + + private Document loadURL(final String cleanUrl, final int attempt) { + try { + log.debug(" * Downloading Url: " + cleanUrl); + final byte[] bytes = this.connector.getInputSource(cleanUrl).getBytes("UTF-8"); + return DocumentHelper.parseText(new String(bytes)); + } catch (final Throwable e) { + log.error("Error dowloading url: " + cleanUrl + ", attempt = " + attempt, e); + if (attempt >= MAX_ATTEMPTS) { + throw new RuntimeException("Error dowloading url: " + cleanUrl, e); + } + try { + Thread.sleep(60000); // I wait for a minute + } catch (final InterruptedException e1) { + throw new RuntimeException("Error dowloading url: " + cleanUrl, e); + } + return loadURL(cleanUrl, attempt + 1); + } + } + + private DateTime parseDate(final String s) { + return DateTime.parse(s.contains("T") ? s.substring(0, s.indexOf("T")) : s, simpleDateTimeFormatter); + } + + private boolean isAfter(final String d, final DateTime fromDate) { + return StringUtils.isNotBlank(d) && parseDate(d).isAfter(fromDate); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java new file mode 100644 index 000000000..b0787eb45 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPlugin.java @@ -0,0 +1,52 @@ + +package eu.dnetlib.dhp.collection.plugin.osf; + +import java.util.Optional; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.math.NumberUtils; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +public class OsfPreprintsCollectorPlugin implements CollectorPlugin { + + public static final int PAGE_SIZE_VALUE_DEFAULT = 100; + + private final HttpClientParams clientParams; + + public OsfPreprintsCollectorPlugin(final HttpClientParams clientParams) { + this.clientParams = clientParams; + } + + @Override + public Stream collect(final ApiDescriptor api, final AggregatorReport report) throws CollectorException { + final String baseUrl = api.getBaseUrl(); + + final int pageSize = Optional + .ofNullable(api.getParams().get("pageSize")) + .filter(StringUtils::isNotBlank) + .map(s -> NumberUtils.toInt(s, PAGE_SIZE_VALUE_DEFAULT)) + .orElse(PAGE_SIZE_VALUE_DEFAULT); + + if (StringUtils.isBlank(baseUrl)) { + throw new CollectorException("Param 'baseUrl' is null or empty"); + } + + final OsfPreprintsIterator it = new OsfPreprintsIterator(baseUrl, pageSize, getClientParams()); + + return StreamSupport + .stream(Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED), false); + } + + public HttpClientParams getClientParams() { + return this.clientParams; + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java new file mode 100644 index 000000000..76adba1a8 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsIterator.java @@ -0,0 +1,151 @@ + +package eu.dnetlib.dhp.collection.plugin.osf; + +import java.util.Iterator; +import java.util.Queue; +import java.util.concurrent.PriorityBlockingQueue; + +import org.apache.commons.lang3.StringUtils; +import org.dom4j.Document; +import org.dom4j.DocumentHelper; +import org.dom4j.Element; +import org.dom4j.Node; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class OsfPreprintsIterator implements Iterator { + + private static final Logger log = LoggerFactory.getLogger(OsfPreprintsIterator.class); + + private static final int MAX_ATTEMPTS = 5; + + private final HttpClientParams clientParams; + + private final String baseUrl; + private final int pageSize; + + private String currentUrl; + + private final Queue recordQueue = new PriorityBlockingQueue<>(); + + public OsfPreprintsIterator( + final String baseUrl, + final int pageSize, + final HttpClientParams clientParams) { + + this.clientParams = clientParams; + this.baseUrl = baseUrl; + this.pageSize = pageSize; + + initQueue(); + } + + private void initQueue() { + this.currentUrl = this.baseUrl + "?filter:is_published:d=true&format=json&page[size]=" + this.pageSize; + + log.info("REST calls starting with {}", this.currentUrl); + } + + @Override + public boolean hasNext() { + synchronized (this.recordQueue) { + while (this.recordQueue.isEmpty() && StringUtils.isNotBlank(this.currentUrl) + && this.currentUrl.startsWith("http")) { + try { + this.currentUrl = downloadPage(this.currentUrl); + } catch (final CollectorException e) { + log.debug("CollectorPlugin.next()-Exception: {}", e); + throw new RuntimeException(e); + } + } + + if (!this.recordQueue.isEmpty()) { + return true; + } + + return false; + } + } + + @Override + public String next() { + synchronized (this.recordQueue) { + return this.recordQueue.poll(); + } + } + + private String downloadPage(final String url) throws CollectorException { + + final Document doc = downloadUrl(url, 0); + + for (final Object o : doc.selectNodes("/*/data")) { + + final Element n = (Element) ((Element) o).detach(); + + final Element group = DocumentHelper.createElement("group"); + group.addAttribute("id", n.valueOf("./id")); + + group.addElement("preprint").add(n); + + for (final Object o1 : n.selectNodes(".//contributors//href")) { + final String href = ((Node) o1).getText(); + if (StringUtils.isNotBlank(href) && href.startsWith("http")) { + final Document doc1 = downloadUrl(href, 0); + group.addElement("contributors").add(doc1.getRootElement().detach()); + } + } + for (final Object o1 : n.selectNodes(".//primary_file//href")) { + final String href = ((Node) o1).getText(); + if (StringUtils.isNotBlank(href) && href.startsWith("http")) { + final Document doc1 = downloadUrl(href, 0); + group.addElement("primary_file").add(doc1.getRootElement().detach()); + } + } + + this.recordQueue.add(DocumentHelper.createDocument(group).asXML()); + } + + return doc.valueOf("/*/links/next"); + + } + + private Document downloadUrl(final String url, final int attempt) throws CollectorException { + if (attempt > MAX_ATTEMPTS) { + throw new CollectorException("Max Number of attempts reached, url:" + url); + } + + if (attempt > 0) { + final int delay = (attempt * 5000); + log.debug("Attempt {} with delay {}", attempt, delay); + try { + Thread.sleep(delay); + } catch (final InterruptedException e) { + new CollectorException(e); + } + } + + try { + log.info("requesting URL [{}]", url); + + final HttpConnector2 connector = new HttpConnector2(this.clientParams); + + final String json = connector.getInputSource(url); + final String xml = JsonUtils.convertToXML(json); + + return DocumentHelper.parseText(xml); + + } catch (final Throwable e) { + log.warn(e.getMessage(), e); + if ((e instanceof CollectorException) && e.getMessage().contains("401")) { + final Element root = DocumentHelper.createElement("error_401_authorization_required"); + return DocumentHelper.createDocument(root); + } + return downloadUrl(url, attempt + 1); + } + } +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java new file mode 100644 index 000000000..2603ecab1 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPlugin.java @@ -0,0 +1,76 @@ + +package eu.dnetlib.dhp.collection.plugin.researchfi; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.http.NameValuePair; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.message.BasicNameValuePair; +import org.json.JSONObject; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.CollectorPlugin; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class ResearchFiCollectorPlugin implements CollectorPlugin { + + private static final Logger log = LoggerFactory.getLogger(ResearchFiCollectorPlugin.class); + + @Override + public Stream collect(final ApiDescriptor api, final AggregatorReport report) + throws CollectorException { + + final String authUrl = api.getParams().get("auth_url"); + final String clientId = api.getParams().get("auth_client_id"); + final String clientSecret = api.getParams().get("auth_client_secret"); + + final String authToken = authenticate(authUrl, clientId, clientSecret); + + final Iterator iter = new ResearchFiIterator(api.getBaseUrl(), authToken); + + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), false); + } + + private String authenticate(final String authUrl, final String clientId, final String clientSecret) + throws CollectorException { + try (final CloseableHttpClient client = HttpClients.createDefault()) { + final HttpPost req = new HttpPost(authUrl); + final List params = new ArrayList<>(); + params.add(new BasicNameValuePair("grant_type", "client_credentials")); + params.add(new BasicNameValuePair("client_id", clientId)); + params.add(new BasicNameValuePair("client_secret", clientSecret)); + + req.setEntity(new UrlEncodedFormEntity(params, "UTF-8")); + + try (final CloseableHttpResponse response = client.execute(req)) { + final String content = IOUtils.toString(response.getEntity().getContent()); + final JSONObject obj = new JSONObject(content); + final String token = obj.getString("access_token"); + if (StringUtils.isNotBlank(token)) { + return token; + } + } + } catch (final Throwable e) { + log.warn("Error obtaining access token", e); + throw new CollectorException("Error obtaining access token", e); + } + throw new CollectorException("Access token is missing"); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java new file mode 100644 index 000000000..269a89f71 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiIterator.java @@ -0,0 +1,117 @@ + +package eu.dnetlib.dhp.collection.plugin.researchfi; + +import java.util.Iterator; +import java.util.Queue; +import java.util.concurrent.PriorityBlockingQueue; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.Header; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.json.JSONArray; + +import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class ResearchFiIterator implements Iterator { + + private static final Log log = LogFactory.getLog(ResearchFiIterator.class); + + private static final int PAGE_SIZE = 100; + + private final String baseUrl; + private final String authToken; + private int currPage; + private int nPages; + + private final Queue queue = new PriorityBlockingQueue<>(); + + public ResearchFiIterator(final String baseUrl, final String authToken) { + this.baseUrl = baseUrl; + this.authToken = authToken; + this.currPage = 0; + this.nPages = 0; + } + + private void verifyStarted() { + if (this.currPage == 0) { + try { + nextCall(); + } catch (final CollectorException e) { + throw new IllegalStateException(e); + } + } + } + + @Override + public boolean hasNext() { + synchronized (this.queue) { + verifyStarted(); + return !this.queue.isEmpty(); + } + } + + @Override + public String next() { + synchronized (this.queue) { + verifyStarted(); + final String res = this.queue.poll(); + while (this.queue.isEmpty() && (this.currPage < this.nPages)) { + try { + nextCall(); + } catch (final CollectorException e) { + throw new IllegalStateException(e); + } + } + return res; + } + } + + private void nextCall() throws CollectorException { + + this.currPage += 1; + + final String url; + if (!this.baseUrl.contains("?")) { + url = String.format("%s?PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); + } else if (!this.baseUrl.contains("PageSize=")) { + url = String.format("%s&PageNumber=%d&PageSize=%d", this.baseUrl, this.currPage, PAGE_SIZE); + } else { + url = String.format("%s&PageNumber=%d", this.baseUrl, this.currPage); + } + log.info("Calling url: " + url); + + try (final CloseableHttpClient client = HttpClients.createDefault()) { + + final HttpGet req = new HttpGet(url); + req.addHeader("Authorization", "Bearer " + this.authToken); + try (final CloseableHttpResponse response = client.execute(req)) { + for (final Header header : response.getAllHeaders()) { + log.debug("HEADER: " + header.getName() + " = " + header.getValue()); + if ("x-page-count".equals(header.getName())) { + final int totalPages = NumberUtils.toInt(header.getValue()); + if (this.nPages != totalPages) { + this.nPages = NumberUtils.toInt(header.getValue()); + log.info("Total pages: " + totalPages); + } + } + } + + final String content = IOUtils.toString(response.getEntity().getContent()); + final JSONArray jsonArray = new JSONArray(content); + + jsonArray.forEach(obj -> this.queue.add(JsonUtils.convertToXML(obj.toString()))); + } + } catch (final Throwable e) { + log.warn("Error calling url: " + url, e); + throw new CollectorException("Error calling url: " + url, e); + } + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java index e05fe263a..7e5c5e3c3 100644 --- a/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java +++ b/dhp-workflows/dhp-aggregation/src/main/java/eu/dnetlib/dhp/collection/plugin/utils/XMLIterator.java @@ -8,7 +8,10 @@ import java.io.StringWriter; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; +import java.util.Arrays; import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; import javax.xml.stream.XMLEventFactory; import javax.xml.stream.XMLEventReader; @@ -19,6 +22,7 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -58,13 +62,23 @@ public class XMLIterator implements Iterator { private String element; + private List elements; + private InputStream inputStream; public XMLIterator(final String element, final InputStream inputStream) { super(); this.element = element; + if (element.contains(",")) { + elements = Arrays + .stream(element.split(",")) + .filter(StringUtils::isNoneBlank) + .map(String::toLowerCase) + .collect(Collectors.toList()); + } this.inputStream = inputStream; this.parser = getParser(); + try { this.current = findElement(parser); } catch (XMLStreamException e) { @@ -113,7 +127,7 @@ public class XMLIterator implements Iterator { final XMLEvent event = parser.nextEvent(); // TODO: replace with depth tracking instead of close tag tracking. - if (event.isEndElement() && event.asEndElement().getName().getLocalPart().equals(element)) { + if (event.isEndElement() && isCheckTag(event.asEndElement().getName().getLocalPart())) { writer.add(event); break; } @@ -142,18 +156,16 @@ public class XMLIterator implements Iterator { XMLEvent peek = parser.peek(); if (peek != null && peek.isStartElement()) { String name = peek.asStartElement().getName().getLocalPart(); - if (element.equals(name)) { + if (isCheckTag(name)) return peek; - } } while (parser.hasNext()) { - final XMLEvent event = parser.nextEvent(); + XMLEvent event = parser.nextEvent(); if (event != null && event.isStartElement()) { String name = event.asStartElement().getName().getLocalPart(); - if (element.equals(name)) { + if (isCheckTag(name)) return event; - } } } return null; @@ -161,12 +173,31 @@ public class XMLIterator implements Iterator { private XMLEventReader getParser() { try { - return inputFactory.get().createXMLEventReader(sanitize(inputStream)); + XMLInputFactory xif = inputFactory.get(); + xif.setProperty(XMLInputFactory.SUPPORT_DTD, false); + return xif.createXMLEventReader(sanitize(inputStream)); } catch (XMLStreamException e) { throw new RuntimeException(e); } } + private boolean isCheckTag(final String tagName) { + if (elements != null) { + final String found = elements + .stream() + .filter(e -> e.equalsIgnoreCase(tagName)) + .findFirst() + .orElse(null); + if (found != null) + return true; + } else { + if (element.equalsIgnoreCase(tagName)) { + return true; + } + } + return false; + } + private Reader sanitize(final InputStream in) { final CharsetDecoder charsetDecoder = Charset.forName(UTF_8).newDecoder(); charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE); diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json index f80d9e446..685d2e50e 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/bipaffiliations/input_actionset_parameter.json @@ -28,7 +28,13 @@ "paramLongName": "dataciteInputPath", "paramDescription": "the path to get the input data from Datacite", "paramRequired": true - }, + },{ + "paramName": "wip", + "paramLongName": "webCrawlInputPath", + "paramDescription": "the path to get the input data from Web Crawl", + "paramRequired": true +} +, { "paramName": "wip", "paramLongName": "webCrawlInputPath", diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json index 7ebaddfdf..301cb5c76 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/opencitations/remap_parameters.json @@ -16,10 +16,11 @@ "paramLongName": "isSparkSessionManged", "paramDescription": "the hdfs name node", "paramRequired": false - },{ - "paramName": "nn", - "paramLongName": "nameNode", - "paramDescription": "the hdfs name node", - "paramRequired": true -} + }, + { + "paramName": "nn", + "paramLongName": "nameNode", + "paramDescription": "the hdfs name node", + "paramRequired": true + } ] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json new file mode 100644 index 000000000..5175552e7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/as_parameters.json @@ -0,0 +1,25 @@ +[ + { + "paramName": "ip", + "paramLongName": "inputPath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + }, { + "paramName": "wd", + "paramLongName": "workingDir", + "paramDescription": "the hdfs name node", + "paramRequired": false +} +] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/job.properties b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/job.properties new file mode 100644 index 000000000..d2269718c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/job.properties @@ -0,0 +1,2 @@ +inputPath=/data/orcid_2023/tables/ +outputPath=/user/miriam.baglioni/peopleAS \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/workflow.xml new file mode 100644 index 000000000..166e7bb9c --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/personentity/oozie_app/workflow.xml @@ -0,0 +1,111 @@ + + + + + inputPath + inputPath + + + outputPath + the path where to store the actionset + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + + + + + + + + + + + + yarn + cluster + Produces the ActionSet for Person entity and relevant relations + eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson + dhp-aggregation-${projectVersion}.jar + + --executor-cores=4 + --executor-memory=4G + --driver-memory=${sparkDriverMemory} + --conf spark.executor.memoryOverhead=5G + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + --conf spark.sql.shuffle.partitions=15000 + + --inputPath${inputPath} + --outputPath${outputPath} + --workingDir${workingDir} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/as_parameters.json b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/as_parameters.json new file mode 100644 index 000000000..3f056edf7 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/as_parameters.json @@ -0,0 +1,20 @@ +[ + { + "paramName": "sp", + "paramLongName": "sourcePath", + "paramDescription": "the zipped opencitations file", + "paramRequired": true + }, + { + "paramName": "op", + "paramLongName": "outputPath", + "paramDescription": "the working path", + "paramRequired": true + }, + { + "paramName": "issm", + "paramLongName": "isSparkSessionManaged", + "paramDescription": "the hdfs name node", + "paramRequired": false + } +] diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/config-default.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/config-default.xml new file mode 100644 index 000000000..d262cb6e0 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/config-default.xml @@ -0,0 +1,30 @@ + + + jobTracker + yarnRM + + + nameNode + hdfs://nameservice1 + + + oozie.use.system.libpath + true + + + hiveMetastoreUris + thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083 + + + hiveJdbcUrl + jdbc:hive2://iis-cdh5-test-m3.ocean.icm.edu.pl:10000 + + + hiveDbName + openaire + + + oozie.launcher.mapreduce.user.classpath.first + true + + diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/workflow.xml new file mode 100644 index 000000000..82144d0d6 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/sdgnodoi/oozie_app/workflow.xml @@ -0,0 +1,125 @@ + + + + + sdgPath + the input path of the resources to be extended + + + outputPath + the path where to store the actionset + + + sparkDriverMemory + memory for driver process + + + sparkExecutorMemory + memory for individual executor + + + sparkExecutorCores + number of cores used by single executor + + + oozieActionShareLibForSpark2 + oozie action sharelib for spark 2.* + + + spark2ExtraListeners + com.cloudera.spark.lineage.NavigatorAppListener + spark 2.* extra listeners classname + + + spark2SqlQueryExecutionListeners + com.cloudera.spark.lineage.NavigatorQueryListener + spark 2.* sql query execution listeners classname + + + spark2YarnHistoryServerAddress + spark 2.* yarn history server address + + + spark2EventLogDir + spark 2.* event log dir location + + + + + ${jobTracker} + ${nameNode} + + + mapreduce.job.queuename + ${queueName} + + + oozie.launcher.mapred.job.queue.name + ${oozieLauncherQueueName} + + + oozie.action.sharelib.for.spark + ${oozieActionShareLibForSpark2} + + + + + + + + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + + + yarn + cluster + Produces the results from FOS + eu.dnetlib.dhp.actionmanager.createunresolvedentities.PrepareSDGSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sdgPath} + --outputPath${workingDir}/prepared + --distributeDoifalse + + + + + + + + + + yarn + cluster + Save the action set grouping results with the same id + eu.dnetlib.dhp.actionmanager.sdgnodoi.CreateActionSetSparkJob + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${workingDir}/prepared/sdg + --outputPath${outputPath} + + + + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/job.properties b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/job.properties index d7bd709fc..641e72610 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/job.properties +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/job.properties @@ -1,3 +1,11 @@ -sourcePath=/user/miriam.baglioni/openalex-snapshot/data/works/ -outputPath=/tmp/miriam/webcrawlComplete/ -blackListPath=/user/miriam.baglioni/openalex-blackList +#PROPERTIES TO CREATE THE ACTION SET +#sourcePath=/user/miriam.baglioni/openalex-snapshot/data/works/ +#outputPath=/tmp/miriam/webcrawlComplete/ +#blackListPath=/user/miriam.baglioni/openalex-blackList +#resumeFrom=create + +#PROPERTIES TO REMOVE FROM THE ACTION SET +sourcePath=/var/lib/dnet/actionManager_PROD/webcrawl/rawset_28247629-468b-478e-9a42-bc540877125d_1718121542061/ +outputPath=/tmp/miriam/webcrawlRemoved/ +blackListPath=/user/miriam.baglioni/oalexBlackListNormalized +resumeFrom=remove \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/oozie_app/workflow.xml b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/oozie_app/workflow.xml index b9394c7e6..ccf34c557 100644 --- a/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-aggregation/src/main/resources/eu/dnetlib/dhp/actionmanager/webcrawl/oozie_app/workflow.xml @@ -20,12 +20,19 @@ - + Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}] + + + ${wf:conf('resumeFrom') eq 'create'} + + + + yarn @@ -50,5 +57,30 @@ + + + + yarn + cluster + Removes some relations found to be wrong from the AS + eu.dnetlib.dhp.actionmanager.webcrawl.RemoveRelationFromActionSet + dhp-aggregation-${projectVersion}.jar + + --executor-memory=${sparkExecutorMemory} + --executor-cores=${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + --conf spark.sql.warehouse.dir=${sparkSqlWarehouseDir} + + --sourcePath${sourcePath} + --outputPath${outputPath} + --blackListPath${blackListPath} + + + + \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala index d74ffcc58..4bd6bcc09 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/collection/crossref/Crossref2Oaf.scala @@ -517,8 +517,10 @@ case object Crossref2Oaf { ) } - if(doi.startsWith("10.3410") || doi.startsWith("10.12703")) - instance.setHostedby(OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true),"H1Connect")) + if (doi.startsWith("10.3410") || doi.startsWith("10.12703")) + instance.setHostedby( + OafMapperUtils.keyValue(OafMapperUtils.createOpenaireId(10, "openaire____::H1Connect", true), "H1Connect") + ) instance.setAccessright( decideAccessRight(instance.getLicense, result.getDateofacceptance.getValue) @@ -904,7 +906,11 @@ case object Crossref2Oaf { val targetId = getProjectId("cihr________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) - +// Added mapping for DFG + case "10.13039/501100001659" => + val targetId = getProjectId("dfgf________", "1e5e62235d094afd01cd56e65112fc63") + queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) + queue += generateRelation(targetId, sourceId, ModelConstants.PRODUCES) case "10.13039/100020031" => val targetId = getProjectId("tara________", "1e5e62235d094afd01cd56e65112fc63") queue += generateRelation(sourceId, targetId, ModelConstants.IS_PRODUCED_BY) @@ -1041,6 +1047,7 @@ case object Crossref2Oaf { tp._1 match { case "electronic" => journal.setIssnOnline(tp._2) case "print" => journal.setIssnPrinted(tp._2) + case _ => } }) } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala index 639918151..11d087583 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/ebi/SparkCreateBaselineDataFrame.scala @@ -2,12 +2,9 @@ package eu.dnetlib.dhp.sx.bio.ebi import eu.dnetlib.dhp.application.ArgumentApplicationParser import eu.dnetlib.dhp.collection.CollectionUtils -import eu.dnetlib.dhp.common.Constants.{MDSTORE_DATA_PATH, MDSTORE_SIZE_PATH} import eu.dnetlib.dhp.common.vocabulary.VocabularyGroup -import eu.dnetlib.dhp.schema.mdstore.MDStoreVersion -import eu.dnetlib.dhp.schema.oaf.{Oaf, Result} +import eu.dnetlib.dhp.schema.oaf.Oaf import eu.dnetlib.dhp.sx.bio.pubmed._ -import eu.dnetlib.dhp.utils.DHPUtils.{MAPPER, writeHdfsFile} import eu.dnetlib.dhp.utils.ISLookupClientFactory import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration @@ -17,13 +14,13 @@ import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.HttpClientBuilder import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD -import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.sql._ +import org.apache.spark.sql.expressions.Aggregator import org.slf4j.{Logger, LoggerFactory} -import java.io.InputStream -import scala.io.Source -import scala.xml.pull.XMLEventReader +import java.io.{ByteArrayInputStream, InputStream} +import java.nio.charset.Charset +import javax.xml.stream.XMLInputFactory object SparkCreateBaselineDataFrame { @@ -86,7 +83,7 @@ object SparkCreateBaselineDataFrame { if (response.getStatusLine.getStatusCode > 400) { tries -= 1 } else - return IOUtils.toString(response.getEntity.getContent) + return IOUtils.toString(response.getEntity.getContent, Charset.defaultCharset()) } catch { case e: Throwable => println(s"Error on requesting ${r.getURI}") @@ -158,7 +155,8 @@ object SparkCreateBaselineDataFrame { IOUtils.toString( SparkEBILinksToOaf.getClass.getResourceAsStream( "/eu/dnetlib/dhp/sx/bio/ebi/baseline_to_oaf_params.json" - ) + ), + Charset.defaultCharset() ) ) parser.parseArgument(args) @@ -167,15 +165,11 @@ object SparkCreateBaselineDataFrame { val workingPath = parser.get("workingPath") log.info("workingPath: {}", workingPath) - val mdstoreOutputVersion = parser.get("mdstoreOutputVersion") - log.info("mdstoreOutputVersion: {}", mdstoreOutputVersion) - - val cleanedMdStoreVersion = MAPPER.readValue(mdstoreOutputVersion, classOf[MDStoreVersion]) - val outputBasePath = cleanedMdStoreVersion.getHdfsPath - log.info("outputBasePath: {}", outputBasePath) + val targetPath = parser.get("targetPath") + log.info("targetPath: {}", targetPath) val hdfsServerUri = parser.get("hdfsServerUri") - log.info("hdfsServerUri: {}", hdfsServerUri) + log.info("hdfsServerUri: {}", targetPath) val skipUpdate = parser.get("skipUpdate") log.info("skipUpdate: {}", skipUpdate) @@ -201,10 +195,11 @@ object SparkCreateBaselineDataFrame { if (!"true".equalsIgnoreCase(skipUpdate)) { downloadBaseLineUpdate(s"$workingPath/baseline", hdfsServerUri) val k: RDD[(String, String)] = sc.wholeTextFiles(s"$workingPath/baseline", 2000) + val inputFactory = XMLInputFactory.newInstance val ds: Dataset[PMArticle] = spark.createDataset( k.filter(i => i._1.endsWith(".gz")) .flatMap(i => { - val xml = new XMLEventReader(Source.fromBytes(i._2.getBytes())) + val xml = inputFactory.createXMLEventReader(new ByteArrayInputStream(i._2.getBytes())) new PMParser(xml) }) ) @@ -223,11 +218,8 @@ object SparkCreateBaselineDataFrame { .map(a => PubMedToOaf.convert(a, vocabularies)) .as[Oaf] .filter(p => p != null), - s"$outputBasePath/$MDSTORE_DATA_PATH" + targetPath ) - val df = spark.read.text(s"$outputBasePath/$MDSTORE_DATA_PATH") - val mdStoreSize = df.count - writeHdfsFile(spark.sparkContext.hadoopConfiguration, s"$mdStoreSize", s"$outputBasePath/$MDSTORE_SIZE_PATH") } } diff --git a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala index 9102c12c4..fb941a461 100644 --- a/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala +++ b/dhp-workflows/dhp-aggregation/src/main/scala/eu/dnetlib/dhp/sx/bio/pubmed/PMParser.scala @@ -1,7 +1,8 @@ package eu.dnetlib.dhp.sx.bio.pubmed import scala.xml.MetaData -import scala.xml.pull.{EvElemEnd, EvElemStart, EvText, XMLEventReader} +import javax.xml.stream.XMLEventReader +import scala.xml.pull.{EvElemEnd, EvElemStart, EvText} /** @param xml */ diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java index ce116688a..0a4dfc00b 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/createunresolvedentities/ProduceTest.java @@ -15,10 +15,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SparkSession; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java index 3b416caf2..ebde0ed0c 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/ReadCOCITest.java @@ -119,7 +119,9 @@ public class ReadCOCITest { workingDir.toString() + "/COCI", "-outputPath", workingDir.toString() + "/COCI_json/", - "-inputFile", "input1;input2;input3;input4;input5" + "-inputFile", "input1;input2;input3;input4;input5", + "-format", + "COCI" }); final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java index 5fc732bcf..629141834 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/opencitations/RemapTest.java @@ -77,13 +77,13 @@ public class RemapTest { MapOCIdsInPids .main( new String[] { - "-isSparkSessionManged", + "--isSparkSessionManged", Boolean.FALSE.toString(), - "-inputPath", + "--inputPath", inputPath, - "-outputPath", + "--outputPath", workingDir.toString() + "/out/", - "-nameNode", "input1;input2;input3;input4;input5" + "--nameNode", "hdfs://localhost" }); } diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/person/CreatePersonAS.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/person/CreatePersonAS.java new file mode 100644 index 000000000..b5333c2fb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/person/CreatePersonAS.java @@ -0,0 +1,213 @@ + +package eu.dnetlib.dhp.actionmanager.person; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.actionmanager.personentity.ExtractPerson; +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.common.ModelConstants; +import eu.dnetlib.dhp.schema.oaf.Person; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.utils.DHPUtils; + +public class CreatePersonAS { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(CreatePersonAS.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(CreatePersonAS.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(CreatePersonAS.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.codegen.wholeStage", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(CreatePersonAS.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testAuthors() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/person/") + .getPath(); + +// spark +// .read() +// .parquet(inputPath + "Authors") +// .as(Encoders.bean(Author.class)) +// .filter((FilterFunction) a -> Optional.ofNullable(a.getOtherNames()).isPresent() && +// Optional.ofNullable(a.getBiography()).isPresent()) +// .write() +// .mode(SaveMode.Overwrite) +// .parquet(workingDir.toString() + "AuthorsSubset"); + + ExtractPerson + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-inputPath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet1", + "-workingDir", + workingDir.toString() + "/working" + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD relations = sc + .sequenceFile(workingDir.toString() + "/actionSet1", Text.class, Text.class) + .filter(v -> "eu.dnetlib.dhp.schema.oaf.Relation".equalsIgnoreCase(v._1().toString())) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); +// + JavaRDD people = sc + .sequenceFile(workingDir.toString() + "/actionSet1", Text.class, Text.class) + .filter(v -> "eu.dnetlib.dhp.schema.oaf.Person".equalsIgnoreCase(v._1().toString())) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Person) aa.getPayload())); +// + Assertions.assertEquals(7, people.count()); + Assertions + .assertEquals( + "Paulo", + people + .filter( + p -> p.getPid().stream().anyMatch(id -> id.getValue().equalsIgnoreCase("0000-0002-3210-3034"))) + .first() + .getGivenName()); + Assertions + .assertEquals( + "Tavares", + people + .filter( + p -> p.getPid().stream().anyMatch(id -> id.getValue().equalsIgnoreCase("0000-0002-3210-3034"))) + .first() + .getFamilyName()); + Assertions + .assertEquals( + 4, + people + .filter( + p -> p.getPid().stream().anyMatch(id -> id.getValue().equalsIgnoreCase("0000-0002-3210-3034"))) + .first() + .getAlternativeNames() + .size()); + Assertions + .assertEquals( + 4, + people + .filter( + p -> p.getPid().stream().anyMatch(id -> id.getValue().equalsIgnoreCase("0000-0002-3210-3034"))) + .first() + .getPid() + .size()); + Assertions + .assertTrue( + people + .filter( + p -> p.getPid().stream().anyMatch(id -> id.getValue().equalsIgnoreCase("0000-0002-3210-3034"))) + .first() + .getPid() + .stream() + .anyMatch( + p -> p.getQualifier().getSchemename().equalsIgnoreCase("Scopus Author ID") + && p.getValue().equalsIgnoreCase("15119405200"))); + + Assertions + .assertEquals( + 16, + relations + .filter(r -> r.getRelClass().equalsIgnoreCase(ModelConstants.RESULT_PERSON_HASAUTHORED)) + .count()); + Assertions + .assertEquals( + 14, + relations + .filter(r -> r.getRelClass().equalsIgnoreCase(ModelConstants.PERSON_PERSON_HASCOAUTHORED)) + .count()); + Assertions + .assertEquals( + 3, + relations + .filter( + r -> r.getSource().equalsIgnoreCase("30|orcid_______::" + DHPUtils.md5("0000-0001-6291-9619")) + && r.getRelClass().equalsIgnoreCase(ModelConstants.RESULT_PERSON_HASAUTHORED)) + .count()); + Assertions + .assertEquals( + 2, + relations + .filter( + r -> r.getSource().equalsIgnoreCase("30|orcid_______::" + DHPUtils.md5("0000-0001-6291-9619")) + && r.getRelClass().equalsIgnoreCase(ModelConstants.RESULT_PERSON_HASAUTHORED) + && r.getTarget().startsWith("50|doi")) + .count()); + Assertions + .assertEquals( + 1, + relations + .filter( + r -> r.getSource().equalsIgnoreCase("30|orcid_______::" + DHPUtils.md5("0000-0001-6291-9619")) + && r.getRelClass().equalsIgnoreCase(ModelConstants.RESULT_PERSON_HASAUTHORED) + && r.getTarget().startsWith("50|arXiv")) + .count()); + + Assertions + .assertEquals( + 1, + relations + .filter( + r -> r.getSource().equalsIgnoreCase("30|orcid_______::" + DHPUtils.md5("0000-0001-6291-9619")) + && r.getRelClass().equalsIgnoreCase(ModelConstants.PERSON_PERSON_HASCOAUTHORED)) + .count()); + Assertions.assertEquals(33, relations.count()); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateASTest.java index e9291f93c..d23b7faa2 100644 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateASTest.java +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/CreateASTest.java @@ -2,6 +2,7 @@ package eu.dnetlib.dhp.actionmanager.webcrawl; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.nio.file.Files; @@ -101,7 +102,10 @@ public class CreateASTest { .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) .map(aa -> ((Relation) aa.getPayload())); - Assertions.assertEquals(58, tmp.count()); + tmp.foreach(r -> System.out.println(new ObjectMapper().writeValueAsString(r))); + tmp.foreach(r -> assertTrue(r.getSource().startsWith("20|ror") || r.getSource().startsWith("50|doi"))); + tmp.foreach(r -> assertTrue(r.getTarget().startsWith("20|ror") || r.getTarget().startsWith("50|doi"))); + Assertions.assertEquals(24, tmp.count()); } @@ -112,7 +116,7 @@ public class CreateASTest { String inputPath = getClass() .getResource( - "/eu/dnetlib/dhp/actionmanager/webcrawl/") + "/eu/dnetlib/dhp/actionmanager/webcrawl/input/") .getPath(); String blackListPath = getClass() .getResource( @@ -194,7 +198,7 @@ public class CreateASTest { Assertions .assertEquals( - 2, tmp + 1, tmp .filter( r -> r .getSource() @@ -207,7 +211,7 @@ public class CreateASTest { Assertions .assertEquals( - 2, tmp + 1, tmp .filter( r -> r .getTarget() @@ -228,13 +232,13 @@ public class CreateASTest { "20|ror_________::" + IdentifierFactory .md5( PidCleaner - .normalizePidValue(PidType.doi.toString(), "https://ror.org/03265fv13"))) + .normalizePidValue("ROR", "https://ror.org/03265fv13"))) && r.getSource().startsWith("50|doi")) .count()); Assertions .assertEquals( - 1, tmp + 0, tmp .filter( r -> r .getTarget() @@ -268,6 +272,10 @@ public class CreateASTest { .getResource( "/eu/dnetlib/dhp/actionmanager/webcrawl") .getPath(); + String blackListPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/webcrawl/blackList/") + .getPath(); CreateActionSetFromWebEntries .main( @@ -277,7 +285,8 @@ public class CreateASTest { "-sourcePath", inputPath, "-outputPath", - workingDir.toString() + "/actionSet1" + workingDir.toString() + "/actionSet1", + "-blackListPath", blackListPath }); final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveFromASTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveFromASTest.java new file mode 100644 index 000000000..bc78804f2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/actionmanager/webcrawl/RemoveFromASTest.java @@ -0,0 +1,108 @@ + +package eu.dnetlib.dhp.actionmanager.webcrawl; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.io.Text; +import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.fasterxml.jackson.databind.ObjectMapper; + +import eu.dnetlib.dhp.schema.action.AtomicAction; +import eu.dnetlib.dhp.schema.oaf.Relation; +import eu.dnetlib.dhp.schema.oaf.utils.IdentifierFactory; +import eu.dnetlib.dhp.schema.oaf.utils.PidCleaner; +import eu.dnetlib.dhp.schema.oaf.utils.PidType; + +/** + * @author miriam.baglioni + * @Date 22/04/24 + */ +public class RemoveFromASTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private static SparkSession spark; + + private static Path workingDir; + private static final Logger log = LoggerFactory + .getLogger(RemoveFromASTest.class); + + @BeforeAll + public static void beforeAll() throws IOException { + workingDir = Files + .createTempDirectory(RemoveFromASTest.class.getSimpleName()); + log.info("using work dir {}", workingDir); + + SparkConf conf = new SparkConf(); + conf.setAppName(RemoveFromASTest.class.getSimpleName()); + + conf.setMaster("local[*]"); + conf.set("spark.driver.host", "localhost"); + conf.set("hive.metastore.local", "true"); + conf.set("spark.ui.enabled", "false"); + conf.set("spark.sql.warehouse.dir", workingDir.toString()); + conf.set("hive.metastore.warehouse.dir", workingDir.resolve("warehouse").toString()); + + spark = SparkSession + .builder() + .appName(RemoveFromASTest.class.getSimpleName()) + .config(conf) + .getOrCreate(); + } + + @AfterAll + public static void afterAll() throws IOException { + FileUtils.deleteDirectory(workingDir.toFile()); + spark.stop(); + } + + @Test + void testNumberofRelations() throws Exception { + + String inputPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/webcrawl/actionSet/") + .getPath(); + String blackListPath = getClass() + .getResource( + "/eu/dnetlib/dhp/actionmanager/webcrawl/blackListRemove/") + .getPath(); + + RemoveRelationFromActionSet + .main( + new String[] { + "-isSparkSessionManaged", + Boolean.FALSE.toString(), + "-sourcePath", + inputPath, + "-outputPath", + workingDir.toString() + "/actionSet1", + "-blackListPath", blackListPath + }); + + final JavaSparkContext sc = new JavaSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .sequenceFile(workingDir.toString() + "/actionSet1", Text.class, Text.class) + .map(value -> OBJECT_MAPPER.readValue(value._2().toString(), AtomicAction.class)) + .map(aa -> ((Relation) aa.getPayload())); + + Assertions.assertEquals(22, tmp.count()); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java new file mode 100644 index 000000000..2b5e90ab2 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/file/FileGZipMultipleNodeTest.java @@ -0,0 +1,64 @@ + +package eu.dnetlib.dhp.collection.plugin.file; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Objects; +import java.util.stream.Stream; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; + +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@ExtendWith(MockitoExtension.class) +public class FileGZipMultipleNodeTest { + + private static final Logger log = LoggerFactory.getLogger(FileGZipCollectorPluginTest.class); + + private final ApiDescriptor api = new ApiDescriptor(); + + private FileGZipCollectorPlugin plugin; + + private static final String SPLIT_ON_ELEMENT = "incollection,article"; + + @BeforeEach + public void setUp() throws IOException { + + final String gzipFile = Objects + .requireNonNull( + this + .getClass() + .getResource("/eu/dnetlib/dhp/collection/plugin/file/dblp.gz")) + .getFile(); + + api.setBaseUrl(gzipFile); + + HashMap params = new HashMap<>(); + params.put("splitOnElement", SPLIT_ON_ELEMENT); + + api.setParams(params); + + FileSystem fs = FileSystem.get(new Configuration()); + plugin = new FileGZipCollectorPlugin(fs); + } + + @Test + void test() throws CollectorException { + + final Stream stream = plugin.collect(api, new AggregatorReport()); + + stream.limit(10).forEach(s -> { + Assertions.assertTrue(s.length() > 0); + log.info(s); + }); + } +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIteratorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIteratorTest.java new file mode 100644 index 000000000..117d7b94f --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/gtr2/Gtr2PublicationsIteratorTest.java @@ -0,0 +1,103 @@ + +package eu.dnetlib.dhp.collection.plugin.gtr2; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.util.Iterator; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.common.collection.HttpClientParams; + +class Gtr2PublicationsIteratorTest { + + private static final String baseURL = "https://gtr.ukri.org/gtr/api"; + + private static final HttpClientParams clientParams = new HttpClientParams(); + + @Test + @Disabled + public void testOne() throws Exception { + System.out.println("one publication"); + + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, null, null, null, clientParams); + + if (iterator.hasNext()) { + final String res = iterator.next(); + assertNotNull(res); + System.out.println(res); + } + } + + @Test + @Disabled + public void testPaging() throws Exception { + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, null, "2", "2", clientParams); + + while (iterator.hasNext()) { + Thread.sleep(300); + final String res = iterator.next(); + assertNotNull(res); + System.out.println(res); + } + } + + @Test + @Disabled + public void testOnePage() throws Exception { + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, null, "12", "12", clientParams); + final int count = iterateAndCount(iterator); + assertEquals(20, count); + } + + @Test + @Disabled + public void testIncrementalHarvestingNoRecords() throws Exception { + System.out.println("incremental Harvesting"); + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, "2050-12-12T", "11", "13", + clientParams); + final int count = iterateAndCount(iterator); + assertEquals(0, count); + } + + @Test + @Disabled + public void testIncrementalHarvesting() throws Exception { + System.out.println("incremental Harvesting"); + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, "2016-11-30", "11", "11", clientParams); + final int count = iterateAndCount(iterator); + assertEquals(20, count); + } + + @Test + @Disabled + public void testCompleteHarvesting() throws Exception { + System.out.println("testing complete harvesting"); + final Iterator iterator = new Gtr2PublicationsIterator(baseURL, null, null, null, clientParams); + // TryIndentXmlString indenter = new TryIndentXmlString(); + // it.setEndAtPage(3); + + while (iterator.hasNext()) { + final String res = iterator.next(); + assertNotNull(res); + // System.out.println(res); + // Scanner keyboard = new Scanner(System.in); + // System.out.println("press enter for next record"); + // keyboard.nextLine(); + + } + } + + private int iterateAndCount(final Iterator iterator) throws Exception { + int i = 0; + while (iterator.hasNext()) { + assertNotNull(iterator.next()); + i++; + } + System.out.println("Got " + i + " publications"); + return i; + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java new file mode 100644 index 000000000..664b84d5a --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/osf/OsfPreprintsCollectorPluginTest.java @@ -0,0 +1,122 @@ + +package eu.dnetlib.dhp.collection.plugin.osf; + +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.util.HashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Stream; + +import org.dom4j.DocumentHelper; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.collection.plugin.utils.JsonUtils; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; +import eu.dnetlib.dhp.common.collection.HttpClientParams; +import eu.dnetlib.dhp.common.collection.HttpConnector2; + +public class OsfPreprintsCollectorPluginTest { + + private static final Logger log = LoggerFactory.getLogger(OsfPreprintsCollectorPlugin.class); + + private final String baseUrl = "https://api.osf.io/v2/preprints/"; + + private final int pageSize = 100; + + private final ApiDescriptor api = new ApiDescriptor(); + + private OsfPreprintsCollectorPlugin plugin; + + @BeforeEach + public void setUp() { + final HashMap params = new HashMap<>(); + params.put("pageSize", "" + this.pageSize); + + this.api.setBaseUrl(this.baseUrl); + this.api.setParams(params); + + this.plugin = new OsfPreprintsCollectorPlugin(new HttpClientParams()); + } + + @Test + @Disabled + void test_one() throws CollectorException { + this.plugin + .collect(this.api, new AggregatorReport()) + .limit(1) + .forEach(log::info); + } + + @Test + @Disabled + void test_limited() throws CollectorException { + final AtomicInteger i = new AtomicInteger(0); + final Stream stream = this.plugin.collect(this.api, new AggregatorReport()); + + stream.limit(2000).forEach(s -> { + Assertions.assertTrue(s.length() > 0); + i.incrementAndGet(); + log.info(s); + }); + + log.info("{}", i.intValue()); + Assertions.assertTrue(i.intValue() > 0); + } + + @Test + @Disabled + void test_all() throws CollectorException { + final AtomicLong i = new AtomicLong(0); + final Stream stream = this.plugin.collect(this.api, new AggregatorReport()); + + stream.forEach(s -> { + Assertions.assertTrue(s.length() > 0); + if ((i.incrementAndGet() % 1000) == 0) { + log.info("COLLECTED: {}", i.get()); + } + + }); + + log.info("TOTAL: {}", i.get()); + Assertions.assertTrue(i.get() > 0); + } + + @Test + @Disabled + void test_authentication_required() { + final HttpConnector2 connector = new HttpConnector2(); + + try { + final String res = connector + .getInputSource("https://api.osf.io/v2/preprints/ydtzx/contributors/?format=json"); + System.out.println(res); + fail(); + } catch (final Throwable e) { + + System.out.println("**** ERROR: " + e.getMessage()); + + if ((e instanceof CollectorException) && e.getMessage().contains("401")) { + System.out.println(" XML: " + DocumentHelper.createDocument().getRootElement().detach()); + } + + assertTrue(e.getMessage().contains("401")); + } + + } + + @Test + void testXML() { + final String xml = JsonUtils.convertToXML("{'next':null}"); + System.out.println(xml); + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java new file mode 100644 index 000000000..3f715c2eb --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/researchfi/ResearchFiCollectorPluginTest.java @@ -0,0 +1,58 @@ + +package eu.dnetlib.dhp.collection.plugin.researchfi; + +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; + +import org.dom4j.DocumentException; +import org.dom4j.DocumentHelper; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; + +import eu.dnetlib.dhp.collection.ApiDescriptor; +import eu.dnetlib.dhp.common.aggregation.AggregatorReport; +import eu.dnetlib.dhp.common.collection.CollectorException; + +public class ResearchFiCollectorPluginTest { + + private final ResearchFiCollectorPlugin plugin = new ResearchFiCollectorPlugin(); + + @Test + @Disabled + void testCollect() throws CollectorException { + final ApiDescriptor api = new ApiDescriptor(); + api.setBaseUrl("https://research.fi/api/rest/v1/funding-decisions?FunderName=AKA&FundingStartYearFrom=2022"); + api.setProtocol("research_fi"); + api + .getParams() + .put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token"); + api.getParams().put("auth_client_id", ""); + api.getParams().put("auth_client_secret", ""); + + final AtomicLong count = new AtomicLong(0); + final Set ids = new HashSet<>(); + + this.plugin.collect(api, new AggregatorReport()).forEach(s -> { + + if (count.getAndIncrement() == 0) { + System.out.println("First: " + s); + } + + try { + final String id = DocumentHelper.parseText(s).valueOf("/recordWrap/funderProjectNumber"); + if (ids.contains(id)) { + System.out.println("Id already present: " + id); + } + ids.add(id); + } catch (final DocumentException e) { + throw new RuntimeException(e); + } + }); + + System.out.println("Total records: " + count); + System.out.println("Total identifiers: " + ids.size()); + + } + +} diff --git a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java b/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java deleted file mode 100644 index 0e64f8bab..000000000 --- a/dhp-workflows/dhp-aggregation/src/test/java/eu/dnetlib/dhp/collection/plugin/rest/OsfPreprintCollectorTest.java +++ /dev/null @@ -1,105 +0,0 @@ - -package eu.dnetlib.dhp.collection.plugin.rest; - -import java.util.HashMap; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.stream.Stream; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import eu.dnetlib.dhp.collection.ApiDescriptor; -import eu.dnetlib.dhp.common.aggregation.AggregatorReport; -import eu.dnetlib.dhp.common.collection.CollectorException; -import eu.dnetlib.dhp.common.collection.HttpClientParams; - -public class OsfPreprintCollectorTest { - - private static final Logger log = LoggerFactory.getLogger(OsfPreprintCollectorTest.class); - - private final String baseUrl = "https://api.osf.io/v2/preprints/"; - - // private final String requestHeaderMap = ""; - // private final String authMethod = ""; - // private final String authToken = ""; - // private final String resultOutputFormat = ""; - - private final String queryParams = "filter:is_published:d=true"; - - private final String entityXpath = "/*/*[local-name()='data']"; - - private final String resultTotalXpath = "/*/*[local-name()='links']/*[local-name()='meta']/*[local-name()='total']"; - - private final String resumptionParam = "page"; - private final String resumptionType = "page"; - private final String resumptionXpath = "/*/*[local-name()='links']/*[local-name()='next']"; - - private final String resultSizeParam = "page[size]"; - private final String resultSizeValue = "100"; - - private final String resultFormatParam = "format"; - private final String resultFormatValue = "json"; - - private final ApiDescriptor api = new ApiDescriptor(); - private RestCollectorPlugin rcp; - - @BeforeEach - public void setUp() { - final HashMap params = new HashMap<>(); - params.put("resumptionType", this.resumptionType); - params.put("resumptionParam", this.resumptionParam); - params.put("resumptionXpath", this.resumptionXpath); - params.put("resultTotalXpath", this.resultTotalXpath); - params.put("resultFormatParam", this.resultFormatParam); - params.put("resultFormatValue", this.resultFormatValue); - params.put("resultSizeParam", this.resultSizeParam); - params.put("resultSizeValue", this.resultSizeValue); - params.put("queryParams", this.queryParams); - params.put("entityXpath", this.entityXpath); - - this.api.setBaseUrl(this.baseUrl); - this.api.setParams(params); - - this.rcp = new RestCollectorPlugin(new HttpClientParams()); - } - - @Test - @Disabled - void test_limited() throws CollectorException { - final AtomicInteger i = new AtomicInteger(0); - final Stream stream = this.rcp.collect(this.api, new AggregatorReport()); - - stream.limit(2000).forEach(s -> { - Assertions.assertTrue(s.length() > 0); - i.incrementAndGet(); - log.info(s); - }); - - log.info("{}", i.intValue()); - Assertions.assertTrue(i.intValue() > 0); - } - - @Test - @Disabled - void test_all() throws CollectorException { - final AtomicLong i = new AtomicLong(0); - final Stream stream = this.rcp.collect(this.api, new AggregatorReport()); - - stream.forEach(s -> { - Assertions.assertTrue(s.length() > 0); - if ((i.incrementAndGet() % 1000) == 0) { - log.info("COLLECTED: {}", i.get()); - } - - }); - - log.info("TOTAL: {}", i.get()); - Assertions.assertTrue(i.get() > 0); - } - -} diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/WorkJson/part-00000 b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/WorkJson/part-00000 new file mode 100644 index 000000000..636595d49 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/person/WorkJson/part-00000 @@ -0,0 +1,10 @@ +{"orcid":"0000-0001-6291-9619","title":"A Visible Light Driven Photoelectrochemical Chloramphenicol Aptasensor Based on a Gold Nanoparticle-Functionalized 3D Flower-like MoS2/TiO2 Heterostructure","pids":[{"value":"10.1021/acs.langmuir.1c02956","schema":"doi"},{"value":"2-s2.0-85124885368","schema":"eid"},{"value":"15205827 07437463","schema":"issn"}]} +{"orcid":"0000-0002-3210-3034","title":"A Visible Light Driven Photoelectrochemical Chloramphenicol Aptasensor Based on a Gold Nanoparticle-Functionalized 3D Flower-like MoS2/TiO2 Heterostructure","pids":[{"value":"10.1021/acs.langmuir.1c02956","schema":"doi"},{"value":"2-s2.0-85124885368","schema":"eid"},{"value":"15205827 07437463","schema":"issn"}]} +{"orcid":"0000-0001-6291-9619","title":"Study of High-Transverse-Momentum Higgs Boson Production in Association with a Vector Boson in the qqbb Final State with the ATLAS Detector","pids":[{"value":"2736741","schema":"other-id"},{"value":"10.1103/PhysRevLett.132.131802","schema":"doi"},{"value":"2312.07605","schema":"arxiv"}]} +{"orcid":"0000-0002-3210-3034","title":"Study of High-Transverse-Momentum Higgs Boson Production in Association with a Vector Boson in the qqbb Final State with the ATLAS Detector","pids":[{"value":"2736741","schema":"other-id"},{"value":"10.1103/PhysRevLett.132.131802","schema":"doi"},{"value":"2312.07605","schema":"arxiv"}]} +{"orcid":"0000-0002-9030-7609","title":"Search for supersymmetry in a final state containing two photons and missing transverse momentum in √s = 13 TeV pp collisions at the LHC using the ATLAS detector","pids":[{"value":"10.1140/epjc/s10052-016-4344-x","schema":"doi"},{"value":"2-s2.0-84988710988","schema":"eid"},{"value":"14346052 14346044","schema":"issn"}]} +{"orcid":"0000-0003-2552-9691","title":"Search for supersymmetry in a final state containing two photons and missing transverse momentum in $\\sqrt{s}$ = 13 TeV $pp$ collisions at the LHC using the ATLAS detector","pids":[{"value":"1473744","schema":"other-id"},{"value":"10.1140/epjc/s10052-016-4344-x","schema":"doi"},{"value":"1606.09150","schema":"arxiv"}]} +{"orcid":"0000-0003-0305-8980","title":"Search for supersymmetry in a final state containing two photons and missing transverse momentum in √s = 13 TeV pp collisions at the LHC using the ATLAS detector","pids":[{"value":"10.1140/epjc/s10052-016-4344-x","schema":"doi"},{"value":"2-s2.0-84988710988","schema":"eid"}]} +{"orcid":"0000-0002-9030-7609","title":"Measurement of the energy response of the ATLAS calorimeter to charged pions from $W^{\\pm }\\rightarrow \\tau ^{\\pm }(\\rightarrow \\pi ^{\\pm }\\nu _{\\tau })\\nu _{\\tau }$ events in Run 2 data","pids":[{"value":"1909507","schema":"other-id"},{"value":"10.1140/epjc/s10052-022-10117-2","schema":"doi"},{"value":"2108.09043","schema":"arxiv"}]} +{"orcid":"0000-0003-2629-4046","title":"Measurement of the energy response of the ATLAS calorimeter to charged pions from $W^{\\pm }\\rightarrow \\tau ^{\\pm }(\\rightarrow \\pi ^{\\pm }\\nu _{\\tau })\\nu _{\\tau }$ events in Run 2 data","pids":[{"value":"1909507","schema":"other-id"},{"value":"10.1140/epjc/s10052-022-10117-2","schema":"doi"},{"value":"2108.09043","schema":"arxiv"}]} +{"orcid":"0000-0001-8582-8912","title":"Measurement of the energy response of the ATLAS calorimeter to charged pions from $W^{\\pm }\\rightarrow \\tau ^{\\pm }(\\rightarrow \\pi ^{\\pm }\\nu _{\\tau })\\nu _{\\tau }$ events in Run 2 data","pids":[{"value":"1909507","schema":"other-id"},{"value":"10.1140/epjc/s10052-022-10117-2","schema":"doi"},{"value":"2108.09043","schema":"arxiv"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackListRemove/not_irish.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackListRemove/not_irish.json new file mode 100644 index 000000000..2c470c555 --- /dev/null +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/actionmanager/webcrawl/blackListRemove/not_irish.json @@ -0,0 +1 @@ +{"doi":"https://doi.org/10.1098/rstl.1684.0023","OpenAlexId":"https://openalex.org/W2124362779"} \ No newline at end of file diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json index 2a9e391df..2f1af2a6e 100644 --- a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json +++ b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/crossref/issn_pub.json @@ -789,10 +789,6 @@ "value": "2227-9717", "type": "electronic" }, - { - "value": "VALUE", - "type": "PIPPO" - }, { "value": "1063-4584", "type": "pu" diff --git a/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/dblp.gz b/dhp-workflows/dhp-aggregation/src/test/resources/eu/dnetlib/dhp/collection/plugin/file/dblp.gz new file mode 100644 index 0000000000000000000000000000000000000000..979bcbed2845c7c7995db4f1fdc10069f89f1bcb GIT binary patch literal 1097 zcmV-P1h)GhiwFotJWOT)17u=sZ~)DhT~pgQ6hPnmE41#+&h8AhtbCBTYutfADHIAr zQrJH2$hKmu$dX5vv(5hcy|P0?0RyFF1|EVfTUW|CS652-_G=-~g6WDYx#bYgcMy|_ zO1aFooP+6!J01@MmpE^uSK)8FCzILd(>;{NqD0fr)7joJa_o(ln$&?e>~-`C!C%NB ziV!R@5EvCHHEhf2`DEn!BbW3Xbjg-mj%IVFVLXNx+|B_1O$Kr5pt6pIS(eZR9xfB1 zN|ti!w4s~uh8w}6Nv#(wm8`0~5VuY1S4x_(Dk8%m#O>w!(UMUe!E6R0ZZ6kPVx=xu zu~kvlhG}%(Ol<>gqqD2&qWzkuEX3=#*AW zWr~^xQ75*c)8m*7-7$H| zE{8VLo+NTb`-v(aUQ(Pteo0MhJ*PnBL~|w$ZN_@fE2A<^3-re>uM*7+L+5SHKx&Tm z_7DE-^fnrk7win0d%dS>YsfyEwBqs3w#yPIQVV99P$?muI(_svU$zc1{s|q$^Vqmh zGW(%oID@t0&MGdE?b=xh4+;?B?$C;*=!j>zxmTR!6!0{HE{|AKlFcPRX#YAvaV&R7MCk{sxlN*jdH*&%!!>>R`AluC4{DzMnaHaL~bR-LW#Oy(b-|pABK2!w<5m6`y|-E zj`cOb5RuBPhHOXgR4r4$4~y314B&}oW1_;6N@`Kxb^dz>=7+Ud-sb&{>Nx0sPqkae zgS-K@J)!k)@P1E)7!B@VdrFPw$tBwM(7A`~Yn_at7{FJsfZ5kl!{ON$!-EvIdp_q$ zyX8aa+Gmn!K)fw77Hi5AG+kDP75A~8u_TueLs-}z`t)Hsfy3aYztQ0SB)I{LAE3;o zs(`(Uuf{|ZxX|opl6~+)bnnUL&EOYiGt+!QV=lN^ZUB-L(wJTydnYg=9ITpSFr1>U zlDdM@qBLLv1xHjv_OI52ynOa7|CY(aAh;uwTlU*^R#&Fe#Aa}->?EmHMa1J`c82vA zi0zf>Jj8ADB4Pi%Y84Mrk>zF4wWdcE;ZecsW)1v(A=+svAi;D}i!(+ki z*|*G>w)|KZF_fpOQFx>>NZwQ39F7ZaxB4zW%?>P7hCZ@?E5VGh$$Z println(mapper.writeValueAsString(s))) } @@ -91,9 +90,10 @@ class BioScholixTest extends AbstractVocabularyTest { @Test def testParsingPubmedXML(): Unit = { - val xml = new XMLEventReader( - Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) - ) + val inputFactory = XMLInputFactory.newInstance + + val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) + val parser = new PMParser(xml) parser.foreach(checkPMArticle) } @@ -156,9 +156,9 @@ class BioScholixTest extends AbstractVocabularyTest { @Test def testPubmedMapping(): Unit = { - val xml = new XMLEventReader( - Source.fromInputStream(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) - ) + val inputFactory = XMLInputFactory.newInstance + val xml = inputFactory.createXMLEventReader(getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/graph/bio/pubmed.xml")) + val parser = new PMParser(xml) val results = ListBuffer[Oaf]() parser.foreach(x => results += PubMedToOaf.convert(x, vocabularies)) diff --git a/dhp-workflows/dhp-dedup-openaire/pom.xml b/dhp-workflows/dhp-dedup-openaire/pom.xml index 8665ebd05..897fa1a76 100644 --- a/dhp-workflows/dhp-dedup-openaire/pom.xml +++ b/dhp-workflows/dhp-dedup-openaire/pom.xml @@ -53,24 +53,10 @@ dhp-pace-core ${project.version} - org.apache.commons commons-lang3 - - - org.scala-lang.modules - scala-java8-compat_${scala.binary.version} - 1.0.2 - - - - org.scala-lang.modules - scala-collection-compat_${scala.binary.version} - 2.11.0 - - org.apache.spark spark-core_${scala.binary.version} @@ -79,16 +65,10 @@ org.apache.spark spark-sql_${scala.binary.version} - org.apache.spark spark-graphx_${scala.binary.version} - - - com.arakelian - java-jq - dom4j dom4j @@ -101,10 +81,6 @@ com.fasterxml.jackson.core jackson-databind - - com.fasterxml.jackson.core - jackson-core - org.apache.httpcomponents httpclient diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java index 3db2efb74..0ff40f6f8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkCreateMergeRels.java @@ -42,6 +42,7 @@ import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.util.SparkCompatUtils; import scala.Tuple3; import scala.collection.JavaConversions; @@ -148,8 +149,7 @@ public class SparkCreateMergeRels extends AbstractSparkAction { Dataset pivotHistory = spark .createDataset( Collections.emptyList(), - RowEncoder - .apply(StructType.fromDDL("id STRING, lastUsage STRING"))); + SparkCompatUtils.encoderFor(StructType.fromDDL("id STRING, lastUsage STRING"))); if (StringUtils.isNotBlank(pivotHistoryDatabase)) { pivotHistory = spark diff --git a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java index e4bcf1e82..c7efce4d7 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java +++ b/dhp-workflows/dhp-dedup-openaire/src/main/java/eu/dnetlib/dhp/oa/dedup/SparkPropagateRelation.java @@ -8,7 +8,6 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.api.java.function.ReduceFunction; import org.apache.spark.sql.*; -import org.apache.spark.sql.catalyst.encoders.RowEncoder; import org.apache.spark.sql.types.StructType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -23,6 +22,7 @@ import eu.dnetlib.dhp.schema.oaf.Relation; import eu.dnetlib.dhp.schema.oaf.utils.MergeUtils; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; +import eu.dnetlib.pace.util.SparkCompatUtils; import scala.Tuple2; import scala.Tuple3; @@ -145,7 +145,7 @@ public class SparkPropagateRelation extends AbstractSparkAction { StructType idsSchema = StructType .fromDDL("`id` STRING, `dataInfo` STRUCT<`deletedbyinference`:BOOLEAN,`invisible`:BOOLEAN>"); - Dataset allIds = spark.emptyDataset(RowEncoder.apply(idsSchema)); + Dataset allIds = spark.emptyDataset(SparkCompatUtils.encoderFor(idsSchema)); for (EntityType entityType : ModelSupport.entityTypes.keySet()) { String entityPath = graphBasePath + '/' + entityType.name(); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java new file mode 100644 index 000000000..6acc65e05 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/DecisionTreeTest.java @@ -0,0 +1,85 @@ + +package eu.dnetlib.dhp.oa.dedup; + +import java.io.IOException; + +import org.apache.commons.io.IOUtils; +import org.apache.spark.sql.Row; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.platform.commons.util.StringUtils; + +import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest; +import eu.dnetlib.pace.config.DedupConfig; +import eu.dnetlib.pace.model.SparkModel; +import eu.dnetlib.pace.tree.support.TreeProcessor; +import eu.dnetlib.pace.tree.support.TreeStats; + +class DecisionTreeTest { + + @Test + void testJPath() throws IOException { + + DedupConfig conf = DedupConfig + .load(IOUtils.toString(getClass().getResourceAsStream("dedup_conf_organization.json"))); + + final String org = IOUtils.toString(getClass().getResourceAsStream("organization.json")); + + Row row = SparkModel.apply(conf).rowFromJson(org); + + System.out.println("row = " + row); + Assertions.assertNotNull(row); + Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); + + System.out.println("row = " + row.getAs("countrytitle")); + } + + @Test + void jsonToModelTest() throws IOException { + DedupConfig conf = DedupConfig + .load( + IOUtils + .toString( + SparkOpenorgsDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json")); + + Row row = SparkModel.apply(conf).rowFromJson(org); + // to check that the same parsing returns the same row + Row row1 = SparkModel.apply(conf).rowFromJson(org); + + Assertions.assertEquals(row, row1); + System.out.println("row = " + row); + Assertions.assertNotNull(row); + Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); + } + + @Test + void organizationDecisionTreeTest() throws Exception { + DedupConfig conf = DedupConfig + .load( + IOUtils + .toString( + SparkOpenorgsDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + final String org1 = "{\"eclegalbody\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecresearchorganization\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"legalname\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"SUSF - Universit\\\\u00e9 internationale de floride\"}, \"pid\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"qualifier\": {\"classid\": \"grid\", \"classname\": \"grid\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"grid.65456.34\"}], \"websiteurl\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"http://www.fiu.edu/\"}, \"ecnutscode\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"logourl\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"collectedfrom\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"GRID - Global Research Identifier Database\", \"key\": \"10|openaire____::ff4a008470319a22d9cf3d14af485977\"}], \"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-similarity-organization-simple\", \"invisible\": false, \"trust\": \"0.89\"}, \"alternativeNames\": [], \"echighereducation\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"id\": \"20|grid________::f22e08fb7bd544b4355f99bef2c43ad5\", \"eclegalperson\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"lastupdatetimestamp\": 1566902405602, \"ecinternationalorganizationeurinterests\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"dateofcollection\": \"\", \"dateoftransformation\": \"\", \"ecnonprofit\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecenterprise\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecinternationalorganization\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"legalshortname\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"FIU\"}, \"country\": {\"classid\": \"US\", \"classname\": \"United States\", \"schemename\": \"dnet:countries\", \"schemeid\": \"dnet:countries\"}, \"extraInfo\": [], \"originalId\": [], \"ecsmevalidated\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}}"; + final String org2 = "{\"eclegalbody\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecresearchorganization\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"legalname\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"SUSF - Universidad Internacional de Florida\"}, \"pid\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"qualifier\": {\"classid\": \"grid\", \"classname\": \"grid\", \"schemename\": \"dnet:pid_types\", \"schemeid\": \"dnet:pid_types\"}, \"value\": \"grid.65456.34\"}], \"websiteurl\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"http://www.fiu.edu/\"}, \"ecnutscode\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"logourl\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"collectedfrom\": [{\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"GRID - Global Research Identifier Database\", \"key\": \"10|openaire____::ff4a008470319a22d9cf3d14af485977\"}], \"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"UNKNOWN\", \"classname\": \"UNKNOWN\", \"schemename\": \"dnet:provenanceActions\", \"schemeid\": \"dnet:provenanceActions\"}, \"inferred\": true, \"inferenceprovenance\": \"dedup-similarity-organization-simple\", \"invisible\": false, \"trust\": \"0.89\"}, \"alternativeNames\": [], \"echighereducation\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"id\": \"20|grid________::2b261e9d8c2a63abbfd5826918c23b6d\", \"eclegalperson\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"lastupdatetimestamp\": 1566902405602, \"ecinternationalorganizationeurinterests\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"dateofcollection\": \"\", \"dateoftransformation\": \"\", \"ecnonprofit\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecenterprise\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"ecinternationalorganization\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}, \"legalshortname\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"FIU\"}, \"country\": {\"classid\": \"US\", \"classname\": \"United States\", \"schemename\": \"dnet:countries\", \"schemeid\": \"dnet:countries\"}, \"extraInfo\": [], \"originalId\": [], \"ecsmevalidated\": {\"dataInfo\": {\"deletedbyinference\": false, \"provenanceaction\": {\"classid\": \"\", \"classname\": \"\", \"schemename\": \"\", \"schemeid\": \"\"}, \"inferred\": false, \"inferenceprovenance\": \"\", \"invisible\": false, \"trust\": \"\"}, \"value\": \"\"}}"; + + Row row1 = SparkModel.apply(conf).rowFromJson(org1); + Row row2 = SparkModel.apply(conf).rowFromJson(org2); + + System.out.println("row1 = " + row1); + System.out.println("row2 = " + row2); + TreeProcessor tree = new TreeProcessor(conf); + + boolean result = tree.compare(row1, row2); + + System.out.println("result = " + result); + + } + +} diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java index c80c98bb7..3bfd861f8 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkDedupTest.java @@ -190,7 +190,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_simrel = " + orp_simrel); if (CHECK_CARDINALITIES) { - assertEquals(751, orgs_simrel); + assertEquals(742, orgs_simrel); assertEquals(566, pubs_simrel); assertEquals(113, sw_simrel); assertEquals(148, ds_simrel); @@ -251,7 +251,7 @@ public class SparkDedupTest implements Serializable { // entities simrels supposed to be equal to the number of previous step (no rels in whitelist) if (CHECK_CARDINALITIES) { - assertEquals(751, orgs_simrel); + assertEquals(742, orgs_simrel); assertEquals(566, pubs_simrel); assertEquals(148, ds_simrel); assertEquals(280, orp_simrel); @@ -442,7 +442,7 @@ public class SparkDedupTest implements Serializable { final List merges = pubs .filter("source == '50|arXiv_dedup_::c93aeb433eb90ed7a86e29be00791b7c'") .collectAsList(); - assertEquals(3, merges.size()); + assertEquals(1, merges.size()); Set dups = Sets .newHashSet( "50|doi_________::3b1d0d8e8f930826665df9d6b82fbb73", @@ -451,7 +451,7 @@ public class SparkDedupTest implements Serializable { merges.forEach(r -> { assertEquals(ModelConstants.RESULT_RESULT, r.getRelType()); assertEquals(ModelConstants.DEDUP, r.getSubRelType()); - assertEquals(ModelConstants.MERGES, r.getRelClass()); + assertEquals(ModelConstants.IS_MERGED_IN, r.getRelClass()); assertTrue(dups.contains(r.getTarget())); }); @@ -561,7 +561,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_mergerel = " + orp_mergerel); if (CHECK_CARDINALITIES) { - assertEquals(1268, orgs_mergerel); + assertEquals(1278, orgs_mergerel); assertEquals(1156, pubs.count()); assertEquals(292, sw_mergerel); assertEquals(476, ds_mergerel); @@ -618,7 +618,7 @@ public class SparkDedupTest implements Serializable { System.out.println("orp_deduprecord = " + orp_deduprecord); if (CHECK_CARDINALITIES) { - assertEquals(86, orgs_deduprecord); + assertEquals(78, orgs_deduprecord); assertEquals(96, pubs.count()); assertEquals(47, sw_deduprecord); assertEquals(97, ds_deduprecord); @@ -761,7 +761,7 @@ public class SparkDedupTest implements Serializable { if (CHECK_CARDINALITIES) { assertEquals(930, publications); - assertEquals(839, organizations); + assertEquals(831, organizations); assertEquals(100, projects); assertEquals(100, datasource); assertEquals(196, softwares); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java index 6f2a6904b..b2b5d824b 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkOpenorgsDedupTest.java @@ -22,8 +22,11 @@ import java.util.Properties; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; +import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; +import org.apache.spark.sql.Row; import org.apache.spark.sql.SparkSession; import org.junit.jupiter.api.*; import org.junit.jupiter.api.extension.ExtendWith; @@ -143,7 +146,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(86, orgs_simrel); + assertEquals(92, orgs_simrel); } @Test @@ -172,7 +175,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .load(DedupUtility.createSimRelPath(testOutputBasePath, testActionSetId, "organization")) .count(); - assertEquals(122, orgs_simrel); + assertEquals(128, orgs_simrel); } @Test @@ -207,7 +210,7 @@ public class SparkOpenorgsDedupTest implements Serializable { .read() .load(testOutputBasePath + "/" + testActionSetId + "/organization_mergerel") .count(); - assertEquals(132, orgs_mergerel); + assertEquals(128, orgs_mergerel); // verify that a DiffRel is in the mergerels (to be sure that the job supposed to remove them has something to // do) diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java index 9afe1e34b..4fa03462a 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/SparkPublicationRootsTest2.java @@ -143,7 +143,9 @@ public class SparkPublicationRootsTest2 implements Serializable { "--graphBasePath", graphInputPath, "--actionSetId", testActionSetId, "--isLookUpUrl", "lookupurl", - "--workingPath", workingPath + "--workingPath", workingPath, + "--hiveMetastoreUris", "none", + "--pivotHistoryDatabase", "none" }), spark) .run(isLookUpService); diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java index 705c2cc84..18c9ce18d 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java +++ b/dhp-workflows/dhp-dedup-openaire/src/test/java/eu/dnetlib/dhp/oa/dedup/jpath/JsonPathTest.java @@ -9,6 +9,7 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.platform.commons.util.StringUtils; +import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest; import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.model.SparkModel; @@ -24,6 +25,31 @@ class JsonPathTest { Row row = SparkModel.apply(conf).rowFromJson(org); + System.out.println("row = " + row); + Assertions.assertNotNull(row); + Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); + + System.out.println("row = " + row.getAs("countrytitle")); + } + + @Test + void jsonToModelTest() throws IOException { + DedupConfig conf = DedupConfig + .load( + IOUtils + .toString( + SparkOpenorgsDedupTest.class + .getResourceAsStream( + "/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); + + final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json")); + + Row row = SparkModel.apply(conf).rowFromJson(org); + // to check that the same parsing returns the same row + Row row1 = SparkModel.apply(conf).rowFromJson(org); + + Assertions.assertEquals(row, row1); + System.out.println("row = " + row); Assertions.assertNotNull(row); Assertions.assertTrue(StringUtils.isNotBlank(row.getAs("identifier"))); } diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json index 917c9426b..f00f6198e 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json @@ -4,8 +4,8 @@ "dedupRun" : "001", "entityType" : "organization", "subEntityValue": "organization", - "orderField" : "legalname", - "queueMaxSize" : "2000", + "orderField" : "original_legalname", + "queueMaxSize" : "100000", "groupMaxSize" : "50", "slidingWindowSize" : "200", "idPath":"$.id", @@ -15,10 +15,10 @@ }, "pace" : { "clustering" : [ - { "name" : "sortedngrampairs", "fields" : [ "legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} }, - { "name" : "suffixprefix", "fields" : [ "legalname" ], "params" : { "max" : 1, "len" : "3" } }, + { "name" : "sortedngrampairs", "fields" : [ "original_legalname" ], "params" : { "max" : 2, "ngramLen" : "3"} }, + { "name" : "suffixprefix", "fields" : [ "original_legalname" ], "params" : { "max" : 1, "len" : "3" } }, { "name" : "urlclustering", "fields" : [ "websiteurl" ], "params" : { } }, - { "name" : "keywordsclustering", "fields" : [ "legalname" ], "params" : { "max": 2, "windowSize": 4} } + { "name" : "legalnameclustering", "fields" : [ "legalname" ], "params" : { "max": 2} } ], "decisionTree" : { "start": { @@ -29,16 +29,23 @@ "weight": 1, "countIfUndefined": "false", "params": {} + }, + { + "field": "rorid", + "comparator": "exactMatch", + "weight": 1, + "countIfUndefined": "false", + "params": {} } ], "threshold": 1, - "aggregation": "AVG", + "aggregation": "OR", "positive": "MATCH", "negative": "NO_MATCH", - "undefined": "layer2", + "undefined": "necessaryConditions", "ignoreUndefined": "false" }, - "layer2": { + "necessaryConditions": { "fields": [ { "field": "websiteurl", @@ -55,14 +62,14 @@ "params": {} }, { - "field": "legalname", + "field": "original_legalname", "comparator": "numbersMatch", "weight": 1, "countIfUndefined": "true", "params": {} }, { - "field": "legalname", + "field": "original_legalname", "comparator": "romansMatch", "weight": 1, "countIfUndefined": "true", @@ -71,68 +78,64 @@ ], "threshold": 1, "aggregation": "AND", - "positive": "layer3", + "positive": "cityCheck", "negative": "NO_MATCH", - "undefined": "layer3", + "undefined": "cityCheck", "ignoreUndefined": "true" }, - "layer3": { + "cityCheck": { "fields": [ { "field": "legalname", - "comparator": "cityMatch", + "comparator": "codeMatch", "weight": 1.0, "countIfUndefined": "true", "params": { - "windowSize": "4" + "codeRegex": "city::\\d+" } } ], "threshold": 0.1, "aggregation": "AVG", - "positive": "layer4", + "positive": "keywordCheck", "negative": "NO_MATCH", "undefined": "NO_MATCH", "ignoreUndefined": "true" }, - "layer4": { + "keywordCheck": { "fields": [ { "field": "legalname", - "comparator": "keywordMatch", + "comparator": "codeMatch", "weight": 1.0, "countIfUndefined": "true", "params": { - "windowSize": "4" + "codeRegex": "key::\\d+" } } ], "threshold": 0.7, "aggregation": "AVG", - "positive": "layer5", + "positive": "nameCheck", "negative": "NO_MATCH", - "undefined": "layer5", + "undefined": "nameCheck", "ignoreUndefined": "true" }, - "layer5": { + "nameCheck": { "fields": [ { "field": "legalname", - "comparator": "jaroWinklerNormalizedName", + "comparator": "jaroWinklerLegalname", "weight": 0.9, "countIfUndefined": "true", - "params": { - "windowSize": "4" - } + "params": {} }, { "field": "legalshortname", - "comparator": "jaroWinklerNormalizedName", + "comparator": "jaroWinklerLegalname", "weight": 0.1, "countIfUndefined": "false", - "params": { - "windowSize": 4 - } + "params": {} } ], "threshold": 0.9, @@ -144,126 +147,16 @@ } }, "model" : [ - { "name" : "country", "type" : "String", "path" : "$.country.classid"}, - { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value"}, - { "name" : "legalname", "type" : "String", "path" : "$.legalname.value" }, + { "name" : "country", "type" : "String", "path" : "$.country.classid", "infer" : "country", "inferenceFrom" : "$.legalname.value"}, + { "name" : "legalshortname", "type" : "String", "path" : "$.legalshortname.value", "infer" : "city_keyword"}, + { "name" : "original_legalname", "type" : "String", "path" : "$.legalname.value" }, + { "name" : "legalname", "type" : "String", "path" : "$.legalname.value", "infer" : "city_keyword"}, { "name" : "websiteurl", "type" : "URL", "path" : "$.websiteurl.value" }, { "name" : "gridid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='grid')].value"}, + { "name" : "rorid", "type" : "String", "path" : "$.pid[?(@.qualifier.classid =='ROR')].value"}, { "name" : "originalId", "type" : "String", "path" : "$.id" } ], - "blacklists" : { - "legalname" : [] - }, - "synonyms": { - "key::1": ["university","università", "universitas", "università studi","universitario","universitaria","université", "universite", "universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti", "universiti"], - "key::2": ["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"], - "key::3": ["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"], - "key::4": ["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"], - "key::5": ["hospital","ospedale","hôpital","hospital","hospital","Krankenhaus","szpital","больница","ziekenhuis","νοσοκομείο"], - "key::6": ["research","ricerca","recherche","investigacion","pesquisa","Forschung","badania","исследования","onderzoek","έρευνα","erevna","erevnas"], - "key::7": ["college","collegio","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","κολλέγιο"], - "key::8": ["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"], - "key::9": ["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"], - "key::10": ["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"], - "key::11": ["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"], - "key::12": ["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"], - "key::13": ["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"], - "key::14": ["community","comunita","communauté","comunidad","comunidade","Gemeinschaft","społeczność","сообщество","gemeenschap","κοινότητα"], - "key::15": ["school","scuola","école","escuela","escola","schule","Szkoła","школа","school","σχολείο"], - "key::16": ["education","educazione","éducation","educacion","Educação","Bildung","Edukacja","образование","opleiding","εκπαίδευση"], - "key::17": ["academy","accademia","académie","academia","academia","Akademie","akademie","академия","academie","ακαδημία"], - "key::18": ["public","pubblico","public","publique","publics","publiques","publico","publico","Öffentlichkeit","publiczny","публичный","publiek","publieke","δημόσιος","δημόσια","δημόσιο"], - "key::19": ["museum","museo","musée","mueso","museu","museum","muzeum","музей","museum","μουσείο"], - "key::20": ["group","gruppo","groupe","grupo","grupo","gruppe","grupa","группа","groep","ομάδα","όμιλος"], - "key::21": ["department","dipartimento","département","departamento","departamento","abteilung","departament","отдел","afdeling","τμήμα"], - "key::22": ["council","consiglio","conseil","Consejo","conselho","gesellschaft","rada","совет","raad","συμβούλιο"], - "key::23": ["library","biblioteca","bibliothèque","biblioteca","biblioteca","Bibliothek","biblioteka","библиотека","bibliotheek","βιβλιοθήκη"], - "key::24": ["ministry","ministero","ministère","ministerio","ministério","Ministerium","ministerstwo","министерство","ministerie","υπουργείο"], - "key::25": ["services","servizi","services","servicios","Serviços","Dienstleistungen","usługi","услуги","diensten","υπηρεσίες"], - "key::26": ["central","centrale","central","centrale","centrales","central","central","zentral","centralny","цетральный","centraal","κεντρικός","κεντρική","κεντρικό","κεντρικά"], - "key::27": ["general","generale","général","générale","généraux","générales","general","geral","general","Allgemeines","general","общий","algemeen","algemene","γενικός","γενική","γενικό","γενικά"], - "key::28": ["applied","applicati","appliqué","appliquée","appliqués","appliquées","aplicado","aplicada","angewendet","stosowany","прикладной","toegepast","toegepaste","εφαρμοσμένος","εφαρμοσμένη","εφαρμοσμένο","εφαρμοσμένα"], - "key::29": ["european","europee","europea","européen","européenne","européens","européennes","europeo","europeu","europäisch","europejski","европейский","Europees","Europese","ευρωπαϊκός","ευρωπαϊκή","ευρωπαϊκό","ευρωπαϊκά"], - "key::30": ["agency","agenzia","agence","agencia","agencia","agentur","agencja","агенция","agentschap","πρακτορείο"], - "key::31": ["laboratory","laboratorio","laboratoire","laboratorio","laboratorio","labor","laboratorium","лаборатория","laboratorium","εργαστήριο"], - "key::32": ["industry","industria","industrie","индустрия","industrie","βιομηχανία"], - "key::33": ["industrial","industriale","industriel","industrielle","industriels","industrielles","индустриальный","industrieel","βιομηχανικός","βιομηχανική","βιομηχανικό","βιομηχανικά","βιομηχανικές"], - "key::34": ["consortium","consorzio","consortium","консорциум","consortium","κοινοπραξία"], - "key::35": ["organization","organizzazione","organisation","organización","organização","organizacja","организация","organisatie","οργανισμός"], - "key::36": ["authority","autorità","autorité","авторитет","autoriteit"], - "key::37": ["federation","federazione","fédération","федерация","federatie","ομοσπονδία"], - "key::38": ["observatory","osservatorio","observatoire","обсерватория","observatorium","αστεροσκοπείο"], - "key::39": ["bureau","ufficio","bureau","офис","bureau","γραφείο"], - "key::40": ["company","impresa","compagnie","société","компания","bedrijf","εταιρία"], - "key::41": ["polytechnic","politecnico","polytechnique","политехника","polytechnisch","πολυτεχνείο","universita politecnica","polytechnic university","universidad politecnica","universitat politecnica","politechnika","politechniki","university technology","university science technology"], - "key::42": ["coalition","coalizione","coalition","коалиция","coalitie","συνασπισμός"], - "key::43": ["initiative","iniziativa","initiative","инициатива","initiatief","πρωτοβουλία"], - "key::44": ["academic","accademico","académique","universitaire","акадеческий academisch","ακαδημαϊκός","ακαδημαϊκή","ακαδημαϊκό","ακαδημαϊκές","ακαδημαϊκοί"], - "key::45": ["institution","istituzione","institution","институциональный","instelling","ινστιτούτο"], - "key::46": ["division","divisione","division","отделение","divisie","τμήμα"], - "key::47": ["committee","comitato","comité","комитет","commissie","επιτροπή"], - "key::48": ["promotion","promozione","продвижение","proothisis","forderung"], - "key::49": ["medical","medicine","clinical","medicina","clinici","médico","medicina","clínica","médico","medicina","clínica","medizinisch","Medizin","klinisch","medisch","geneeskunde","klinisch","ιατρικός","ιατρική","ιατρικό","ιατρικά","κλινικός","κλινική","κλινικό","κλινικά","tıbbi","tıp","klinik","orvosi","orvostudomány","klinikai","zdravniški","medicinski","klinični","meditsiini","kliinik","kliiniline"], - "key::50": ["technology","technological","tecnologia","tecnologie","tecnología","tecnológico","tecnologia","tecnológico","Technologie","technologisch","technologie","technologisch","τεχνολογία","τεχνολογικός","τεχνολογική","τεχνολογικό","teknoloji","teknolojik","technológia","technológiai","tehnologija","tehnološki","tehnoloogia","tehnoloogiline","technologii","technical","texniki","teknik"], - "key::51": ["science","scientific","scienza","scientifiche","scienze","ciencia","científico","ciência","científico","Wissenschaft","wissenschaftlich","wetenschap","wetenschappelijk","επιστήμη","επιστημονικός","επιστημονική","επιστημονικό","επιστημονικά","bilim","bilimsel","tudomány","tudományos","znanost","znanstveni","teadus","teaduslik",""], - "key::52": ["engineering","ingegneria","ingeniería","engenharia","Ingenieurwissenschaft","ingenieurswetenschappen","bouwkunde","μηχανικός","μηχανική","μηχανικό","mühendislik","mérnöki","Inženirstvo","inseneeria","inseneri",""], - "key::53": ["management","gestione","gestionale","gestionali","gestión","administración","gestão","administração","Verwaltung","management","διαχείριση","yönetim","menedzsment","vodstvo","upravljanje","management","juhtkond","juhtimine","haldus",""], - "key::54": ["energy","energia","energía","energia","Energie","energie","ενέργεια","enerji","energia","energija","energia",""], - "key::55": ["agricultural","agriculture","agricoltura","agricole","agrícola","agricultura","agrícola","agricultura","landwirtschaftlich","Landwirtschaft","landbouwkundig","landbouw","αγροτικός","αγροτική","αγροτικό","γεωργικός","γεωργική","γεωργικό","γεωργία","tarımsal","tarım","mezőgazdasági","mezőgazdaság","poljedelski","poljedelstvo","põllumajandus","põllumajanduslik",""], - "key::56": ["information","informazione","información","informação","Information","informatie","πληροφορία","bilgi","információ","informacija","informatsioon","informatycznych",""], - "key::57": ["social","sociali","social","social","Sozial","sociaal","maatschappelijk","κοινωνικός","κοινωνική","κοινωνικό","κοινωνικά","sosyal","szociális","družbeni","sotsiaal","sotsiaalne",""], - "key::58": ["environmental","ambiente","medioambiental","ambiente","medioambiente","meioambiente","Umwelt","milieu","milieuwetenschap","milieukunde","περιβαλλοντικός","περιβαλλοντική","περιβαλλοντικό","περιβαλλοντικά","çevre","környezeti","okoliški","keskonna",""], - "key::59": ["business","economia","economiche","economica","negocio","empresa","negócio","Unternehmen","bedrijf","bedrijfskunde","επιχείρηση","iş","üzleti","posel","ettevõte/äri",""], - "key::60": ["pharmaceuticals","pharmacy","farmacia","farmaceutica","farmacéutica","farmacia","farmacêutica","farmácia","Pharmazeutika","Arzneimittelkunde","farmaceutica","geneesmiddelen","apotheek","φαρμακευτικός","φαρμακευτική","φαρμακευτικό","φαρμακευτικά","φαρμακείο","ilaç","eczane","gyógyszerészeti","gyógyszertár","farmacevtika","lekarništvo","farmaatsia","farmatseutiline",""], - "key::61": ["healthcare","health services","salute","atenciónmédica","cuidadodelasalud","cuidadoscomasaúde","Gesundheitswesen","gezondheidszorg","ιατροφαρμακευτικήπερίθαλψη","sağlıkhizmeti","egészségügy","zdravstvo","tervishoid","tervishoiu",""], - "key::62": ["history","storia","historia","história","Geschichte","geschiedenis","geschiedkunde","ιστορία","tarih","történelem","zgodovina","ajalugu",""], - "key::63": ["materials","materiali","materia","materiales","materiais","materialen","υλικά","τεκμήρια","malzemeler","anyagok","materiali","materjalid","vahendid",""], - "key::64": ["economics","economia","economiche","economica","economía","economia","Wirtschaft","economie","οικονομικά","οικονομικέςεπιστήμες","ekonomi","közgazdaságtan","gospodarstvo","ekonomija","majanduslik","majandus",""], - "key::65": ["therapeutics","terapeutica","terapéutica","terapêutica","therapie","θεραπευτική","tedavibilimi","gyógykezelés","terapevtika","terapeutiline","ravi",""], - "key::66": ["oncology","oncologia","oncologico","oncología","oncologia","Onkologie","oncologie","ογκολογία","onkoloji","onkológia","onkologija","onkoloogia",""], - "key::67": ["natural","naturali","naturale","natural","natural","natürlich","natuurlijk","φυσικός","φυσική","φυσικό","φυσικά","doğal","természetes","naraven","loodus",""], - "key::68": ["educational","educazione","pedagogia","educacional","educativo","educacional","pädagogisch","educatief","εκπαιδευτικός","εκπαιδευτική","εκπαιδευτικό","εκπαιδευτικά","eğitimsel","oktatási","izobraževalen","haridus","hariduslik",""], - "key::69": ["biomedical","biomedica","biomédico","biomédico","biomedizinisch","biomedisch","βιοιατρικός","βιοιατρική","βιοιατρικό","βιοιατρικά","biyomedikal","orvosbiológiai","biomedicinski","biomeditsiiniline",""], - "key::70": ["veterinary","veterinaria","veterinarie","veterinaria","veterinária","tierärtzlich","veterinair","veeartsenijlkunde","κτηνιατρικός","κτηνιατρική","κτηνιατρικό","κτηνιατρικά","veteriner","állatorvosi","veterinar","veterinarski","veterinaaria",""], - "key::71": ["chemistry","chimica","química","química","Chemie","chemie","scheikunde","χημεία","kimya","kémia","kemija","keemia",""], - "key::72": ["security","sicurezza","seguridad","segurança","Sicherheit","veiligheid","ασφάλεια","güvenlik","biztonsági","varnost","turvalisus","julgeolek",""], - "key::73": ["biotechnology","biotecnologia","biotecnologie","biotecnología","biotecnologia","Biotechnologie","biotechnologie","βιοτεχνολογία","biyoteknoloji","biotechnológia","biotehnologija","biotehnoloogia",""], - "key::74": ["military","militare","militari","militar","militar","Militär","militair","leger","στρατιωτικός","στρατιωτική","στρατιωτικό","στρατιωτικά","askeri","katonai","vojaški","vojni","militaar","wojskowa",""], - "key::75": ["theological","teologia","teologico","teológico","tecnológica","theologisch","theologisch","θεολογικός","θεολογική","θεολογικό","θεολογικά","teolojik","technológiai","teološki","teoloogia","usuteadus","teoloogiline",""], - "key::76": ["electronics","elettronica","electrónica","eletrônicos","Elektronik","elektronica","ηλεκτρονική","elektronik","elektronika","elektronika","elektroonika",""], - "key::77": ["forestry","forestale","forestali","silvicultura","forestal","floresta","Forstwirtschaft","bosbouw","δασοκομία","δασολογία","ormancılık","erdészet","gozdarstvo","metsandus",""], - "key::78": ["maritime","marittima","marittime","marittimo","marítimo","marítimo","maritiem","ναυτικός","ναυτική","ναυτικό","ναυτικά","ναυτιλιακός","ναυτιλιακή","ναυτιλιακό","ναυτιλιακά","θαλάσσιος","θαλάσσια","θαλάσσιο","denizcilik","tengeri","morski","mere","merendus",""], - "key::79": ["sports","sport","deportes","esportes","Sport","sport","sportwetenschappen","άθληση","γυμναστικήδραστηριότητα","spor","sport","šport","sport","spordi",""], - "key::80": ["surgery","chirurgia","chirurgiche","cirugía","cirurgia","Chirurgie","chirurgie","heelkunde","εγχείρηση","επέμβαση","χειρουργικήεπέμβαση","cerrahi","sebészet","kirurgija","kirurgia",""], - "key::81": ["cultural","culturale","culturali","cultura","cultural","cultural","kulturell","cultureel","πολιτιστικός","πολιτιστική","πολιτιστικό","πολιτισμικός","πολιτισμική","πολιτισμικό","kültürel","kultúrális","kulturni","kultuuri","kultuuriline",""], - "key::82": ["computerscience","informatica","ordenador","computadora","informática","computación","cienciasdelacomputación","ciênciadacomputação","Computer","computer","υπολογιστής","ηλεκτρονικόςυπολογιστής","bilgisayar","számítógép","računalnik","arvuti",""], - "key::83": ["finance","financial","finanza","finanziarie","finanza","financiero","finanças","financeiro","Finanzen","finanziell","financiën","financieel","χρηματοοικονομικά","χρηματοδότηση","finanse","finansal","pénzügy","pénzügyi","finance","finančni","finants","finantsiline",""], - "key::84": ["communication","comunicazione","comuniciación","comunicação","Kommunikation","communication","επικοινωνία","iletişim","kommunikáció","komuniciranje","kommunikatsioon",""], - "key::85": ["justice","giustizia","justicia","justiça","Recht","Justiz","justitie","gerechtigheid","δικαιοσύνη","υπουργείοδικαιοσύνης","δίκαιο","adalet","igazságügy","pravo","õigus",""], - "key::86": ["aerospace","aerospaziale","aerospaziali","aeroespacio","aeroespaço","Luftfahrt","luchtvaart","ruimtevaart","αεροπορικός","αεροπορική","αεροπορικό","αεροναυπηγικός","αεροναυπηγική","αεροναυπηγικό","αεροναυπηγικά","havacılıkveuzay","légtér","zrakoplovstvo","atmosfäär","kosmos",""], - "key::87": ["dermatology","dermatologia","dermatología","dermatologia","Dermatologie","dermatologie","δρματολογία","dermatoloji","bőrgyógyászat","dermatológia","dermatologija","dermatoloogia",""], - "key::88": ["architecture","architettura","arquitectura","arquitetura","Architektur","architectuur","αρχιτεκτονική","mimarlık","építészet","arhitektura","arhitektuur",""], - "key::89": ["mathematics","matematica","matematiche","matemáticas","matemáticas","Mathematik","wiskunde","mathematica","μαθηματικά","matematik","matematika","matematika","matemaatika",""], - "key::90": ["language","lingue","linguistica","linguistiche","lenguaje","idioma","língua","idioma","Sprache","taal","taalkunde","γλώσσα","dil","nyelv","jezik","keel",""], - "key::91": ["neuroscience","neuroscienza","neurociencia","neurociência","Neurowissenschaft","neurowetenschappen","νευροεπιστήμη","nörobilim","idegtudomány","nevroznanost","neuroteadused",""], - "key::92": ["automation","automazione","automatización","automação","Automatisierung","automatisering","αυτοματοποίηση","otomasyon","automatizálás","avtomatizacija","automatiseeritud",""], - "key::93": ["pediatric","pediatria","pediatriche","pediatrico","pediátrico","pediatría","pediátrico","pediatria","pädiatrisch","pediatrische","παιδιατρική","pediatrik","gyermekgyógyászat","pediatrija","pediaatria",""], - "key::94": ["photonics","fotonica","fotoniche","fotónica","fotônica","Photonik","fotonica","φωτονική","fotonik","fotonika","fotonika","fotoonika",""], - "key::95": ["mechanics", "mechanical", "meccanica","meccaniche","mecánica","mecânica","Mechanik","Maschinenbau","mechanica","werktuigkunde","μηχανικής","mekanik","gépészet","mehanika","mehaanika",""], - "key::96": ["psychiatrics","psichiatria","psichiatrica","psichiatriche","psiquiatría","psiquiatria","Psychiatrie","psychiatrie","ψυχιατρική","psikiyatrik","pszihiátria","psihiatrija","psühhaatria",""], - "key::97": ["psychology","fisiologia","psicología","psicologia","Psychologie","psychologie","ψυχολογία","psikoloji","pszihológia","psihologija","psühholoogia",""], - "key::98": ["automotive","industriaautomobilistica","industriadelautomóvil","automotriz","industriaautomotriz","automotivo","Automobilindustrie","autoindustrie","αυτοκίνητος","αυτοκίνητη","αυτοκίνητο","αυτοκινούμενος","αυτοκινούμενη","αυτοκινούμενο","αυτοκινητιστικός","αυτοκινητιστική","αυτοκινητιστικό","otomotiv","autóipari","samogiben","avtomobilskaindustrija","auto-",""], - "key::99": ["neurology","neurologia","neurologiche","neurología","neurologia","Neurologie","neurologie","zenuwleer","νευρολογία","nöroloji","neurológia","ideggyógyászat","nevrologija","neuroloogia",""], - "key::100": ["geology","geologia","geologiche","geología","geologia","Geologie","geologie","aardkunde","γεωλογία","jeoloji","geológia","földtudomány","geologija","geoloogia",""], - "key::101": ["microbiology","microbiologia","micro-biologia","microbiologiche","microbiología","microbiologia","Mikrobiologie","microbiologie","μικροβιολογία","mikrobiyoloji","mikrobiológia","mikrobiologija","mikrobioloogia",""], - "key::102": ["informatics","informatica","informática","informática","informatica",""], - "key::103": ["forschungsgemeinschaft","comunita ricerca","research community","research foundation","research association"], - "key::104": ["commerce","ticaret","ticarət","commercio","trade","handel","comercio"], - "key::105" : ["state", "stato", "etade", "estado", "statale", "etat", "zustand", "estado"], - "key::106" : ["seminary", "seminario", "seminaire", "seminar"], - "key::107" : ["agricultural forestry", "af", "a f"], - "key::108" : ["agricultural mechanical", "am", "a m"], - "key::109" : ["catholic", "catholique", "katholische", "catolica", "cattolica", "catolico"] - } + "blacklists" : {}, + "synonyms": {} } } \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/dataset_merge.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/dataset_merge.json index 74e55146f..86d1a8133 100644 --- a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/dataset_merge.json +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/dedup/json/dataset_merge.json @@ -1,2 +1,2 @@ {"publisher": {"value": "DANS Data Station Archaeology"}, "dateofcollection": "2024-04-30T12:49:55+0000", "dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": true}, "bestaccessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "language": {"classid": "und", "classname": "Undetermined", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "author": [{"affiliation": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "(Geonius)"}], "fullname": "S.A.H. Augustin", "pid": [], "rank": 1}, {"affiliation": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "(Geonius)"}], "fullname": "J.J.G. Geraeds", "pid": [], "rank": 2}], "resourcetype": {"classid": "UNKNOWN", "classname": "Unknown", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "originalId": ["50|datacite____::3d18564ef27ebe9ef3bd8b4dec67e148", "10.17026/ar/vt9ya1"], "description": [{"value": "In opdracht van Gemeente Beesel heeft Geonius Archeologie in maart 2023 een Inventariserend Veldonderzoek door middel van Proefsleuven (IVO-P) uitgevoerd voor het plangebied Heijackerstraat te Beesel in de gemeente Beesel. Aanleiding voor het uitvoeren van het archeologisch onderzoek vormt de aanvraag van een omgevingsvergunning voor bouw van 20 nieuwe woningen. Uit het vooronderzoek is gebleken dat het plangebied in een dalvakteterras ligt rondom opgestoven landduinen langsheen de Maas. De bodem bestaat volgens de bodemkaart uit vorstvaaggronden. Het plangebied is in het verleden voor zover kon worden vastgesteld in gebruik geweest als bouwland en is niet bebouwd geweest. Het IVO-O heeft uitgewezen dat de bodemopbouw deels intact is, al lijken in sommige boringen sporen van vergravingen of verstoringen te bevatten. Op grond van de resultaten van het vooronderzoek is een hoge verwachting opgesteld op het voorkomen van archeologische waarden uit het paleolithicum tot aan de vroege middeleeuwen. Voor de periode late middeleeuwen en nieuwe tijd is een lage verwachting opgesteld. Op grond van de resultaten van het vooronderzoek is een IVO-P uitgevoerd. Hierbij is een vindplaats aangetroffen bestaande uit drie subrecente greppels en een tweetal recente verstoringen. De vindplaats is als niet behoudenswaardig gewaardeerd. Aanbevolen is het plangebied vrij te geven voor de geplande ontwikkeling."}], "title": [{"qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Archeologisch onderzoek IVO-P plangebied Heijackerstraat te Beesel"}], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}], "id": "50|doi_________::3d18564ef27ebe9ef3bd8b4dec67e148", "instance": [{"refereed": {"classid": "0002", "classname": "nonPeerReviewed", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "EASY"}, "url": ["https://dx.doi.org/10.17026/ar/vt9ya1"], "pid": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:actionset", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}], "instanceTypeMapping": [{"originalType": "Dataset", "typeLabel": "dataset", "vocabularyName": "openaire::coar_resource_types_3_1", "typeCode": "http://purl.org/coar/resource_type/c_ddb1"}], "dateofacceptance": {"value": "2024-01-01"}, "collectedfrom": {"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}, "accessright": {"classid": "UNKNOWN", "classname": "not available", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "relevantdate": [{"qualifier": {"classid": "issued", "classname": "issued", "schemeid": "dnet:dataCite_date", "schemename": "dnet:dataCite_date"}, "value": "2024-01-01"}], "resulttype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "context": [], "collectedfrom": [{"key": "10|openaire____::9e3be59865b2c1c335d32dae2fe7b254", "value": "Datacite"}], "dateoftransformation": "2024-04-30T12:49:55+0000", "subject": [], "dateofacceptance": {"value": "2024-01-01"}, "metaResourceType": {"classid": "Research Data", "classname": "Research Data", "schemeid": "openaire::meta_resource_types", "schemename": "openaire::meta_resource_types"}} -{"geolocation": [{"box": "", "place": "", "point": ""}], "dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": true}, "resourcetype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "pid": [], "contributor": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "Geonius"}], "oaiprovenance": {"originDescription": {"metadataNamespace": "", "harvestDate": "2024-05-05T04:33:31Z", "baseURL": "https://easy.dans.knaw.nl/oai", "datestamp": "", "altered": true, "identifier": "oai:easy.dans.knaw.nl:easy-dataset:341200"}}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [], "collectedfrom": [{"key": "10|re3data_____::730f562f9efe8a3b3742d2da510d4335", "value": "B2FIND"}], "id": "50|r3730f562f9e::ace629fb505b6b4343faca03edde1841", "subject": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Ancient Cultures"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Humanities"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Archaeology"}], "lastupdatetimestamp": 1716803651625, "author": [{"surname": "Augustin", "name": "S. A. H.", "pid": [], "rank": 1, "affiliation": [], "fullname": "S.A.H. Augustin"}, {"surname": "Geraeds", "name": "J. J. G.", "pid": [], "rank": 2, "affiliation": [], "fullname": "J.J.G. Geraeds"}], "instance": [{"refereed": {"classid": "0002", "classname": "nonPeerReviewed", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "DANS-EASY"}, "url": ["http://dx.doi.org/https://doi.org/10.17026/AR/VT9YA1"], "pid": [], "instanceTypeMapping": [{"originalType": "Dataset", "typeLabel": "dataset", "vocabularyName": "openaire::coar_resource_types_3_1", "typeCode": "http://purl.org/coar/resource_type/c_ddb1"}], "alternateIdentifier": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2024-01-01"}, "collectedfrom": {"key": "10|re3data_____::730f562f9efe8a3b3742d2da510d4335", "value": "B2FIND"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2024-05-10T00:02:09+0000", "metaResourceType": {"classid": "Research Data", "classname": "Research Data", "schemeid": "openaire::meta_resource_types", "schemename": "openaire::meta_resource_types"}, "dateoftransformation": "2024-05-26T00:23:54.028Z", "description": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "In opdracht van Gemeente Beesel heeft Geonius Archeologie in maart 2023 een Inventariserend Veldonderzoek door middel van Proefsleuven (IVO-P) uitgevoerd voor het plangebied Heijackerstraat te Beesel in de gemeente Beesel. Aanleiding voor het uitvoeren van het archeologisch onderzoek vormt de aanvraag van een omgevingsvergunning voor bouw van 20 nieuwe woningen. Uit het vooronderzoek is gebleken dat het plangebied in een dalvakteterras ligt rondom opgestoven landduinen langsheen de Maas. De bodem bestaat volgens de bodemkaart uit vorstvaaggronden. Het plangebied is in het verleden voor zover kon worden vastgesteld in gebruik geweest als bouwland en is niet bebouwd geweest. Het IVO-O heeft uitgewezen dat de bodemopbouw deels intact is, al lijken in sommige boringen sporen van vergravingen of verstoringen te bevatten. Op grond van de resultaten van het vooronderzoek is een hoge verwachting opgesteld op het voorkomen van archeologische waarden uit het paleolithicum tot aan de vroege middeleeuwen. Voor de periode late middeleeuwen en nieuwe tijd is een lage verwachting opgesteld. Op grond van de resultaten van het vooronderzoek is een IVO-P uitgevoerd. Hierbij is een vindplaats aangetroffen bestaande uit drie subrecente greppels en een tweetal recente verstoringen. De vindplaats is als niet behoudenswaardig gewaardeerd. Aanbevolen is het plangebied vrij te geven voor de geplande ontwikkeling."}], "format": [], "coverage": [], "externalReference": [], "publisher": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "Data Archiving and Networked Services (DANS)"}, "context": [], "eoscifguidelines": [], "language": {"classid": "und", "classname": "Undetermined", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["oai:easy.dans.knaw.nl:easy-dataset:341200", "50|r3730f562f9e::ace629fb505b6b4343faca03edde1841"], "source": [], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2024-01-01"}, "title": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Archeologisch onderzoek IVO-P plangebied Heijackerstraat te Beesel"}]} +{"geolocation": [{"box": "", "place": "", "point": ""}], "dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": true}, "resourcetype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:dataCite_resource", "schemename": "dnet:dataCite_resource"}, "pid": [], "contributor": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "Geonius"}], "oaiprovenance": {"originDescription": {"metadataNamespace": "", "harvestDate": "2024-05-05T04:33:31Z", "baseURL": "https://easy.dans.knaw.nl/oai", "datestamp": "", "altered": true, "identifier": "oai:easy.dans.knaw.nl:easy-dataset:341200"}}, "bestaccessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "relevantdate": [], "collectedfrom": [{"key": "10|re3data_____::730f562f9efe8a3b3742d2da510d4335", "value": "B2FIND"}], "id": "50|r3730f562f9e::ace629fb505b6b4343faca03edde1841", "subject": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Ancient Cultures"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Humanities"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "keyword", "classname": "keyword", "schemeid": "dnet:subject_classification_typologies", "schemename": "dnet:subject_classification_typologies"}, "value": "Archaeology"}], "lastupdatetimestamp": 1716803651625, "author": [{"surname": "Augustin", "name": "S. A. H.", "pid": [], "rank": 1, "affiliation": [], "fullname": "S.A.H. Augustin"}, {"surname": "Geraeds", "name": "J. J. G.", "pid": [], "rank": 2, "affiliation": [], "fullname": "J.J.G. Geraeds"}], "instance": [{"refereed": {"classid": "0002", "classname": "nonPeerReviewed", "schemeid": "dnet:review_levels", "schemename": "dnet:review_levels"}, "hostedby": {"key": "10|re3data_____::84e123776089ce3c7a33db98d9cd15a8", "value": "DANS-EASY"}, "url": ["http://dx.doi.org/https://doi.org/10.17026/AR/VT9YA1"], "pid": [], "instanceTypeMapping": [{"originalType": "Dataset", "typeLabel": "dataset", "vocabularyName": "openaire::coar_resource_types_3_1", "typeCode": "http://purl.org/coar/resource_type/c_ddb1"}], "alternateIdentifier": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}, {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "doi", "classname": "Digital Object Identifier", "schemeid": "dnet:pid_types", "schemename": "dnet:pid_types"}, "value": "10.17026/ar/vt9ya1"}], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2024-01-01"}, "collectedfrom": {"key": "10|re3data_____::730f562f9efe8a3b3742d2da510d4335", "value": "B2FIND"}, "accessright": {"classid": "OPEN", "classname": "Open Access", "schemeid": "dnet:access_modes", "schemename": "dnet:access_modes"}, "instancetype": {"classid": "0021", "classname": "Dataset", "schemeid": "dnet:publication_resource", "schemename": "dnet:publication_resource"}}], "dateofcollection": "2024-05-10T00:02:09+0000", "metaResourceType": {"classid": "Research Data", "classname": "Research Data", "schemeid": "openaire::meta_resource_types", "schemename": "openaire::meta_resource_types"}, "dateoftransformation": "2024-05-26T00:23:54.028Z", "description": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "In opdracht van Gemeente Beesel heeft Geonius Archeologie in maart 2023 een Inventariserend Veldonderzoek door middel van Proefsleuven (IVO-P) uitgevoerd voor het plangebied Heijackerstraat te Beesel in de gemeente Beesel. Aanleiding voor het uitvoeren van het archeologisch onderzoek vormt de aanvraag van een omgevingsvergunning voor bouw van 20 nieuwe woningen. Uit het vooronderzoek is gebleken dat het plangebied in een dalvakteterras ligt rondom opgestoven landduinen langsheen de Maas. De bodem bestaat volgens de bodemkaart uit vorstvaaggronden. Het plangebied is in het verleden voor zover kon worden vastgesteld in gebruik geweest als bouwland en is niet bebouwd geweest. Het IVO-O heeft uitgewezen dat de bodemopbouw deels intact is, al lijken in sommige boringen sporen van vergravingen of verstoringen te bevatten. Op grond van de resultaten van het vooronderzoek is een hoge verwachting opgesteld op het voorkomen van archeologische waarden uit het paleolithicum tot aan de vroege middeleeuwen. Voor de periode late middeleeuwen en nieuwe tijd is een lage verwachting opgesteld. Op grond van de resultaten van het vooronderzoek is een IVO-P uitgevoerd. Hierbij is een vindplaats aangetroffen bestaande uit drie subrecente greppels en een tweetal recente verstoringen. De vindplaats is als niet behoudenswaardig gewaardeerd. Aanbevolen is het plangebied vrij te geven voor de geplande ontwikkeling."}], "format": [], "coverage": [], "externalReference": [], "publisher": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "Data Archiving and Networked Services (DANS)"}, "context": [], "eoscifguidelines": [], "language": {"classid": "und", "classname": "Undetermined", "schemeid": "dnet:languages", "schemename": "dnet:languages"}, "resulttype": {"classid": "dataset", "classname": "dataset", "schemeid": "dnet:result_typologies", "schemename": "dnet:result_typologies"}, "country": [], "extraInfo": [], "originalId": ["oai:easy.dans.knaw.nl:easy-dataset:341200", "50|r3730f562f9e::ace629fb505b6b4343faca03edde1841"], "source": [], "dateofacceptance": {"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "value": "2024-01-01"}, "title": [{"dataInfo": {"invisible": false, "provenanceaction": {"classid": "sysimport:crosswalk:repository", "classname": "Harvested", "schemeid": "dnet:provenanceActions", "schemename": "dnet:provenanceActions"}, "trust": "0.9", "inferred": false, "deletedbyinference": false}, "qualifier": {"classid": "main title", "classname": "main title", "schemeid": "dnet:dataCite_title", "schemename": "dnet:dataCite_title"}, "value": "Archeologisch onderzoek IVO-P plangebied Heijackerstraat te Beesel"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json new file mode 100644 index 000000000..52fb304a9 --- /dev/null +++ b/dhp-workflows/dhp-dedup-openaire/src/test/resources/eu/dnetlib/dhp/oa/dedup/jpath/organization_example1.json @@ -0,0 +1 @@ +{"collectedfrom":[{"key":"10|openaire____::0362fcdb3076765d9c0041ad331553e8","value":"OpenOrgs Database","dataInfo":null}],"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}},"lastupdatetimestamp":1617006357185,"id":"20|openorgsmesh::8536807f92ca146e6f0990b07d712406","originalId":["openorgsmesh::0000098375-0dcc2286c99400f0ac63d0db84a57759"],"pid":[{"value":"0000 0004 1757 1758","qualifier":{"classid":"ISNI","classname":"International Standard Name Identifier","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"263329","qualifier":{"classid":"OrgRef","classname":"OrgRef","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"501100005969","qualifier":{"classid":"FundRef","classname":"FundRef","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"grid.6292.f","qualifier":{"classid":"GRID","classname":"GRID","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"https://ror.org/01111rn36","qualifier":{"classid":"ROR","classname":"ROR","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},{"value":"Q131262","qualifier":{"classid":"Wikidata","classname":"Wikidata","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}}],"dateofcollection":"2021-03-17","dateoftransformation":"2021-03-17","extraInfo":[],"oaiprovenance":null,"legalshortname":{"value":"Università di Bologna","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"legalname":{"value":"Università di Bologna","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"alternativeNames":[],"websiteurl":{"value":"http://www.unibo.it/en/homepage","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"logourl":null,"eclegalbody":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"eclegalperson":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnonprofit":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecresearchorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"echighereducation":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganizationeurinterests":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecinternationalorganization":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecenterprise":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecsmevalidated":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"ecnutscode":{"value":"false","dataInfo":{"invisible":false,"inferred":false,"deletedbyinference":false,"trust":"0.880","inferenceprovenance":"","provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"sysimport:crosswalk:entityregistry","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"}}},"country":{"classid":"IT","classname":"Italy","schemeid":"dnet:countries","schemename":"dnet:countries"}} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java index 354741690..e9a45be40 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/SparkBulkTagJob.java @@ -33,10 +33,7 @@ import eu.dnetlib.dhp.application.ArgumentApplicationParser; import eu.dnetlib.dhp.bulktag.community.*; import eu.dnetlib.dhp.schema.common.ModelConstants; import eu.dnetlib.dhp.schema.common.ModelSupport; -import eu.dnetlib.dhp.schema.oaf.Context; -import eu.dnetlib.dhp.schema.oaf.Datasource; -import eu.dnetlib.dhp.schema.oaf.Project; -import eu.dnetlib.dhp.schema.oaf.Result; +import eu.dnetlib.dhp.schema.oaf.*; import eu.dnetlib.dhp.schema.oaf.utils.OafMapperUtils; import scala.Tuple2; @@ -114,27 +111,35 @@ public class SparkBulkTagJob { extendCommunityConfigurationForEOSC(spark, inputPath, cc); execBulkTag( spark, inputPath, outputPath, protoMap, cc); + execEntityTag( + spark, inputPath + "organization", outputPath + "organization", + Utils.getCommunityOrganization(baseURL), Organization.class, TaggingConstants.CLASS_ID_ORGANIZATION, + TaggingConstants.CLASS_NAME_BULKTAG_ORGANIZATION); + execEntityTag( + spark, inputPath + "project", outputPath + "project", Utils.getCommunityProjects(baseURL), + Project.class, TaggingConstants.CLASS_ID_PROJECT, TaggingConstants.CLASS_NAME_BULKTAG_PROJECT); execDatasourceTag(spark, inputPath, outputPath, Utils.getDatasourceCommunities(baseURL)); - execProjectTag(spark, inputPath, outputPath, Utils.getCommunityProjects(baseURL)); + }); } - private static void execProjectTag(SparkSession spark, String inputPath, String outputPath, - CommunityEntityMap communityProjects) { - Dataset projects = readPath(spark, inputPath + "project", Project.class); + private static void execEntityTag(SparkSession spark, String inputPath, String outputPath, + CommunityEntityMap communityEntity, Class entityClass, + String classID, String calssName) { + Dataset entity = readPath(spark, inputPath, entityClass); Dataset pc = spark .createDataset( - communityProjects + communityEntity .keySet() .stream() - .map(k -> EntityCommunities.newInstance(k, communityProjects.get(k))) + .map(k -> EntityCommunities.newInstance(k, communityEntity.get(k))) .collect(Collectors.toList()), Encoders.bean(EntityCommunities.class)); - projects - .joinWith(pc, projects.col("id").equalTo(pc.col("entityId")), "left") - .map((MapFunction, Project>) t2 -> { - Project ds = t2._1(); + entity + .joinWith(pc, entity.col("id").equalTo(pc.col("entityId")), "left") + .map((MapFunction, E>) t2 -> { + E ds = t2._1(); if (t2._2() != null) { List context = Optional .ofNullable(ds.getContext()) @@ -156,8 +161,8 @@ public class SparkBulkTagJob { false, TaggingConstants.BULKTAG_DATA_INFO_TYPE, true, false, OafMapperUtils .qualifier( - TaggingConstants.CLASS_ID_DATASOURCE, - TaggingConstants.CLASS_NAME_BULKTAG_DATASOURCE, + classID, + calssName, ModelConstants.DNET_PROVENANCE_ACTIONS, ModelConstants.DNET_PROVENANCE_ACTIONS), "1"))); @@ -166,17 +171,17 @@ public class SparkBulkTagJob { }); } return ds; - }, Encoders.bean(Project.class)) + }, Encoders.bean(entityClass)) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(outputPath + "project"); + .json(outputPath); - readPath(spark, outputPath + "project", Project.class) + readPath(spark, outputPath, entityClass) .write() .mode(SaveMode.Overwrite) .option("compression", "gzip") - .json(inputPath + "project"); + .json(inputPath); } private static void execDatasourceTag(SparkSession spark, String inputPath, String outputPath, diff --git a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java index aea21f8e5..6e6f12cfb 100644 --- a/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java +++ b/dhp-workflows/dhp-enrichment/src/main/java/eu/dnetlib/dhp/bulktag/community/TaggingConstants.java @@ -13,6 +13,9 @@ public class TaggingConstants { public static final String CLASS_ID_CZENODO = "community:zenodocommunity"; public static final String CLASS_ID_ADVANCED_CONSTRAINT = "community:advconstraint"; + public static final String CLASS_ID_PROJECT = "community:project"; + public static final String CLASS_ID_ORGANIZATION = "community:organization"; + public static final String ZENODO_COMMUNITY_INDICATOR = "zenodo.org/communities/"; public static final String CLASS_NAME_BULKTAG_SUBJECT = "Bulktagging for Community - Subject"; @@ -20,5 +23,8 @@ public class TaggingConstants { public static final String CLASS_NAME_BULKTAG_ZENODO = "Bulktagging for Community - Zenodo"; public static final String CLASS_NAME_BULKTAG_ADVANCED_CONSTRAINT = "Bulktagging for Community - Advanced Constraints"; + public static final String CLASS_NAME_BULKTAG_PROJECT = "Bulktagging for Community - Project"; + public static final String CLASS_NAME_BULKTAG_ORGANIZATION = "Bulktagging for Community - Organization"; + public static final String TAGGING_TRUST = "0.8"; } diff --git a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java index a5280a3b3..f49774574 100644 --- a/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java +++ b/dhp-workflows/dhp-enrichment/src/test/java/eu/dnetlib/dhp/bulktag/BulkTagJobTest.java @@ -465,6 +465,138 @@ public class BulkTagJobTest { } + @Test + void organizationTag() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") + .getPath(); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/bulktag/pathMap/") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap")); + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-baseURL", "https://services.openaire.eu/openaire/community/", + + "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", + "-nameNode", "local" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/organization") + .map(item -> OBJECT_MAPPER.readValue(item, Organization.class)); + + Assertions.assertEquals(4, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Organization.class)); + + verificationDataset.createOrReplaceTempView("organization"); + + String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from organization " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + idExplodeCommunity.show(false); + + Assertions.assertEquals(3, idExplodeCommunity.count()); + Assertions + .assertEquals( + 3, idExplodeCommunity.filter("provenance = 'community:organization'").count()); + Assertions + .assertEquals( + 3, + idExplodeCommunity + .filter("name = 'Bulktagging for Community - Organization'") + .count()); + + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'netherlands'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'beopen'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'mes'").count()); + + } + + @Test + void projectTag() throws Exception { + final String sourcePath = getClass() + .getResource("/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/") + .getPath(); + LocalFileSystem fs = FileSystem.getLocal(new Configuration()); + fs + .copyFromLocalFile( + false, new org.apache.hadoop.fs.Path(getClass() + .getResource("/eu/dnetlib/dhp/bulktag/pathMap/") + .getPath()), + new org.apache.hadoop.fs.Path(workingDir.toString() + "/data/bulktagging/protoMap")); + SparkBulkTagJob + .main( + new String[] { + + "-isSparkSessionManaged", Boolean.FALSE.toString(), + "-sourcePath", sourcePath, + "-taggingConf", taggingConf, + + "-outputPath", workingDir.toString() + "/", + "-baseURL", "https://services.openaire.eu/openaire/community/", + + "-pathMap", workingDir.toString() + "/data/bulktagging/protoMap/pathMap", + "-nameNode", "local" + }); + + final JavaSparkContext sc = JavaSparkContext.fromSparkContext(spark.sparkContext()); + + JavaRDD tmp = sc + .textFile(workingDir.toString() + "/project") + .map(item -> OBJECT_MAPPER.readValue(item, Project.class)); + + Assertions.assertEquals(4, tmp.count()); + org.apache.spark.sql.Dataset verificationDataset = spark + .createDataset(tmp.rdd(), Encoders.bean(Project.class)); + + verificationDataset.createOrReplaceTempView("project"); + + String query = "select id, MyT.id community, MyD.provenanceaction.classid provenance, MyD.provenanceaction.classname name " + + "from project " + + "lateral view explode(context) c as MyT " + + "lateral view explode(MyT.datainfo) d as MyD " + + "where MyD.inferenceprovenance = 'bulktagging'"; + + org.apache.spark.sql.Dataset idExplodeCommunity = spark.sql(query); + + idExplodeCommunity.show(false); + + Assertions.assertEquals(4, idExplodeCommunity.count()); + Assertions + .assertEquals( + 4, idExplodeCommunity.filter("provenance = 'community:project'").count()); + Assertions + .assertEquals( + 4, + idExplodeCommunity + .filter("name = 'Bulktagging for Community - Project'") + .count()); + + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'enermaps'").count()); + Assertions.assertEquals(1, idExplodeCommunity.filter("community = 'clarin'").count()); + Assertions.assertEquals(2, idExplodeCommunity.filter("community = 'dh-ch'").count()); + + } + @Test void bulktagByZenodoCommunityTest() throws Exception { final String sourcePath = getClass() diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization new file mode 100644 index 000000000..cf7a2f90e --- /dev/null +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/organization @@ -0,0 +1,4 @@ +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2024-03-29","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|openorgs____::2b9422d5f5bac4aae99b24e61586d3d3","lastupdatetimestamp":1719997513126,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"Laboratoire de Physique Statistique, UMR 8550 - Ecole Normale Supérieure"},"organizationType":"Unknown","originalId":["anr_________::9fdd505a0c45c6ad239c7c1406145820"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"qualifier":{"classid":"RNSR","classname":"RNSR","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"RNSR:199812878L"}]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2023-12-04","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|openorgs____::00010e003d3ce9c05e038cadfb6d4f03","lastupdatetimestamp":1719997513126,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"National Institute for Agricultural and Food Research and Technology"},"organizationType":"Unknown","originalId":["anr_________::16ba9bdd7a02f3e5f38a9c6a0ca24be6"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::457528c43fabd74e212db2ed61101075","value":"Agence Nationale de la Recherche"}],"country":{"classid":"UNKNOWN","classname":"Unknown","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"dateofcollection":"2021-02-15","dateoftransformation":"2022-10-11","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"false"},"extraInfo":[],"id":"20|openorgs____::ad863df6deda1619a25e7fad4a534891","lastupdatetimestamp":1719997513126,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.810"},"value":"INSTITUT DE VEILLE SANITAIRE"},"organizationType":"Unknown","originalId":["anr_________::27a60b3e005f647d173309729e4c62ae"],"pid":[]} +{"alternativeNames":[],"collectedfrom":[{"key":"10|openaire____::b30dac7baac631f3da7c2bb18dd9891f","value":"CORDA - COmmon Research DAta Warehouse"}],"country":{"classid":"DE","classname":"Germany","schemeid":"dnet:countries","schemename":"dnet:countries"},"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"dateofcollection":"2018-03-12","dateoftransformation":"2022-05-24","ecenterprise":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"echighereducation":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecinternationalorganizationeurinterests":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalbody":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"eclegalperson":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"true"},"ecnonprofit":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecnutscode":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecresearchorganization":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"ecsmevalidated":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"false"},"extraInfo":[],"id":"20|corda_______::0dfa837c57e43e0528db09543a8c4dac","lastupdatetimestamp":1719997513126,"legalname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"ALLGEMEINER DEUTSCHER AUTOMOBIL CLUB E.V."},"legalshortname":{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"value":"ADAC"},"organizationType":"Unknown","originalId":["corda_______::999607990"],"pid":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"","inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.800"},"qualifier":{"classid":"PIC","classname":"Participant Identification Code","schemeid":"dnet:pid_types","schemename":"dnet:pid_types"},"value":"999607990"}]} \ No newline at end of file diff --git a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project index e69de29bb..2b9628ea1 100644 --- a/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project +++ b/dhp-workflows/dhp-enrichment/src/test/resources/eu/dnetlib/dhp/bulktag/sample/publication/update_datasource/project @@ -0,0 +1,4 @@ +{"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"unidentified"},"collectedfrom":[{"key":"10|openaire____::718b1e0b0206401b66e8d87f68876121","value":"Stroke Association"}],"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2024-04-20","dateoftransformation":"2024-04-20","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"extraInfo":[],"fundedamount":0.0,"fundingtree":[],"id":"40|ukri________::6937e06c766521c0629d8a6ca20ca345","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1719997513126,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"uk_funders_import"},"originalId":["501100000364::unidentified"],"pid":[],"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"unidentified"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Academy Project BY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"276480"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2024-07-05T18:46:23.059","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2018-08-31"},"extraInfo":[],"fundedamount":471335.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|corda_______::ef782b2d85676aa3e5a907427feb18c4","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1719997513126,"measures":[{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"19"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"19"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"486"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"840"}]},{"id":"downloads","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"17"}]},{"id":"views","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:usage_counts","classname":"measure:usage_counts","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"count","value":"9"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"471,335 €"},"originalId":["aka_________::276480"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2014-09-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Yeast hybrid vigour: elucidation of mechanisms governing competitive advantage and sub-genome compatibility after interspecific hybridization"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Subsidy for organising of international conferences KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"107956"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-12-31"},"extraInfo":[],"fundedamount":2000.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::017ddc7873b184db49337b2472cdc83f","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1719997513126,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2,000 €"},"originalId":["aka_________::107956"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2005-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"NODALIDA 2005 15th Nordic Conference on Computational Linguistics, 19.-22.5.2005"},"totalcost":0.0} +{"callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Biofuture 2025, second stage LT"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"307624"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2024-07-05T18:46:23.059","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2020-12-31"},"extraInfo":[],"fundedamount":331277.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|snsf________::911db67dc5eebd24d7787d69d4974ca4","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1719997513126,"measures":[{"id":"numOfInfluentialResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8"}]},{"id":"numOfPopularResults","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"8"}]},{"id":"totalImpulse","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"28"}]},{"id":"totalCitationCount","unit":[{"dataInfo":{"deletedbyinference":false,"inferenceprovenance":"update","inferred":true,"invisible":false,"provenanceaction":{"classid":"measure:bip","classname":"measure:bip","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":""},"key":"score","value":"30"}]}],"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"331,277 €"},"originalId":["aka_________::307624"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2017-01-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Overcoming technology barriers with tailored catalysts: Design of molecularly functionalized heterogeneous catalysts for selective reductions of biomass-derived materials / Consortium: FunCat"},"totalcost":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml index b74562284..ff927fe52 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/raw_all/oozie_app/workflow.xml @@ -368,6 +368,32 @@ --mdLayoutstore --mdInterpretationcleaned + + + + + + + yarn + cluster + ImportODF_hdfs_invisible + eu.dnetlib.dhp.oa.graph.raw.MigrateHdfsMdstoresApplication + dhp-graph-mapper-${projectVersion}.jar + + --executor-memory ${sparkExecutorMemory} + --executor-cores ${sparkExecutorCores} + --driver-memory=${sparkDriverMemory} + --conf spark.extraListeners=${spark2ExtraListeners} + --conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners} + --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} + --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} + + --hdfsPath${contentPath}/odf_mdstore_hdfs_invisible + --mdstoreManagerUrl${mdstoreManagerUrl} + --mdFormatODF + --mdLayoutstore + --mdInterpretationintersection + @@ -529,7 +555,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/* + --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs_invisible,${contentPath}/mdstore/*/* --invalidPath${workingDir}/invalid_records --isLookupUrl${isLookupUrl} @@ -553,7 +579,7 @@ --conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress} --conf spark.eventLog.dir=${nameNode}${spark2EventLogDir} - --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/mdstore/*/* + --sourcePaths${contentPath}/db_openaire,${contentPath}/db_openorgs,${contentPath}/oaf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs,${contentPath}/odf_mdstore_hdfs_invisible,${contentPath}/mdstore/*/* --targetPath${workingDir}/entities --isLookupUrl${isLookupUrl} --shouldHashId${shouldHashId} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOpenOrgsForOrgsDedup.sql b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOpenOrgsForOrgsDedup.sql index d9e4b855d..933c26356 100644 --- a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOpenOrgsForOrgsDedup.sql +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/oa/graph/sql/queryOpenOrgsForOrgsDedup.sql @@ -25,12 +25,14 @@ SELECT null AS ecinternationalorganization, null AS ecenterprise, null AS ecsmevalidated, - null AS ecnutscode + null AS ecnutscode, + org_types.name AS typology FROM organizations o LEFT OUTER JOIN acronyms a ON (a.id = o.id) LEFT OUTER JOIN urls u ON (u.id = o.id) LEFT OUTER JOIN other_ids i ON (i.id = o.id) LEFT OUTER JOIN other_names n ON (n.id = o.id) + LEFT OUTER JOIN org_types ON (org_types.val = o.type) WHERE o.status = 'approved' GROUP BY @@ -38,7 +40,8 @@ GROUP BY o.name, o.creation_date, o.modification_date, - o.country + o.country, + org_types.name UNION ALL @@ -69,13 +72,15 @@ SELECT (array_remove(array_cat(ARRAY[o.ec_internationalorganization], array_agg(od.ec_internationalorganization)), NULL))[1] AS ecinternationalorganization, (array_remove(array_cat(ARRAY[o.ec_enterprise], array_agg(od.ec_enterprise)), NULL))[1] AS ecenterprise, (array_remove(array_cat(ARRAY[o.ec_smevalidated], array_agg(od.ec_smevalidated)), NULL))[1] AS ecsmevalidated, - (array_remove(array_cat(ARRAY[o.ec_nutscode], array_agg(od.ec_nutscode)), NULL))[1] AS ecnutscode + (array_remove(array_cat(ARRAY[o.ec_nutscode], array_agg(od.ec_nutscode)), NULL))[1] AS ecnutscode, + org_types.name AS typology FROM other_names n LEFT OUTER JOIN organizations o ON (n.id = o.id) LEFT OUTER JOIN urls u ON (u.id = o.id) LEFT OUTER JOIN other_ids i ON (i.id = o.id) LEFT OUTER JOIN oa_duplicates d ON (o.id = d.local_id) LEFT OUTER JOIN organizations od ON (d.oa_original_id = od.id) + LEFT OUTER JOIN org_types ON (org_types.val = o.type) WHERE o.status = 'approved' GROUP BY @@ -83,4 +88,5 @@ GROUP BY o.creation_date, o.modification_date, o.country, + org_types.name, n.name; \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json new file mode 100644 index 000000000..53fe95895 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/create_scholix_dump_params.json @@ -0,0 +1,5 @@ +[ + {"paramName":"mt", "paramLongName":"master", "paramDescription": "should be local or yarn", "paramRequired": false}, + {"paramName":"s", "paramLongName":"sourcePath", "paramDescription": "the source Path", "paramRequired": true}, + {"paramName":"t", "paramLongName":"targetPath", "paramDescription": "the path of the scholix dump", "paramRequired": true} +] \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json new file mode 100644 index 000000000..4f0cee53d --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/resources/eu/dnetlib/dhp/sx/relation/relations.json @@ -0,0 +1,166 @@ +{ + "cites":{ + "original":"Cites", + "inverse":"IsCitedBy" + }, + "compiles":{ + "original":"Compiles", + "inverse":"IsCompiledBy" + }, + "continues":{ + "original":"Continues", + "inverse":"IsContinuedBy" + }, + "derives":{ + "original":"IsSourceOf", + "inverse":"IsDerivedFrom" + }, + "describes":{ + "original":"Describes", + "inverse":"IsDescribedBy" + }, + "documents":{ + "original":"Documents", + "inverse":"IsDocumentedBy" + }, + "hasmetadata":{ + "original":"HasMetadata", + "inverse":"IsMetadataOf" + }, + "hasassociationwith":{ + "original":"HasAssociationWith", + "inverse":"HasAssociationWith" + }, + "haspart":{ + "original":"HasPart", + "inverse":"IsPartOf" + }, + "hasversion":{ + "original":"HasVersion", + "inverse":"IsVersionOf" + }, + "iscitedby":{ + "original":"IsCitedBy", + "inverse":"Cites" + }, + "iscompiledby":{ + "original":"IsCompiledBy", + "inverse":"Compiles" + }, + "iscontinuedby":{ + "original":"IsContinuedBy", + "inverse":"Continues" + }, + "isderivedfrom":{ + "original":"IsDerivedFrom", + "inverse":"IsSourceOf" + }, + "isdescribedby":{ + "original":"IsDescribedBy", + "inverse":"Describes" + }, + "isdocumentedby":{ + "original":"IsDocumentedBy", + "inverse":"Documents" + }, + "isidenticalto":{ + "original":"IsIdenticalTo", + "inverse":"IsIdenticalTo" + }, + "ismetadatafor":{ + "original":"IsMetadataFor", + "inverse":"IsMetadataOf" + }, + "ismetadataof":{ + "original":"IsMetadataOf", + "inverse":"IsMetadataFor" + }, + "isnewversionof":{ + "original":"IsNewVersionOf", + "inverse":"IsPreviousVersionOf" + }, + "isobsoletedby":{ + "original":"IsObsoletedBy", + "inverse":"Obsoletes" + }, + "isoriginalformof":{ + "original":"IsOriginalFormOf", + "inverse":"IsVariantFormOf" + }, + "ispartof":{ + "original":"IsPartOf", + "inverse":"HasPart" + }, + "ispreviousversionof":{ + "original":"IsPreviousVersionOf", + "inverse":"IsNewVersionOf" + }, + "isreferencedby":{ + "original":"IsReferencedBy", + "inverse":"References" + }, + "isrelatedto":{ + "original":"IsRelatedTo", + "inverse":"IsRelatedTo" + }, + "isrequiredby":{ + "original":"IsRequiredBy", + "inverse":"Requires" + }, + "isreviewedby":{ + "original":"IsReviewedBy", + "inverse":"Reviews" + }, + "issourceof":{ + "original":"IsSourceOf", + "inverse":"IsDerivedFrom" + }, + "issupplementedby":{ + "original":"IsSupplementedBy", + "inverse":"IsSupplementTo" + }, + "issupplementto":{ + "original":"IsSupplementTo", + "inverse":"IsSupplementedBy" + }, + "isvariantformof":{ + "original":"IsVariantFormOf", + "inverse":"IsOriginalFormOf" + }, + "isversionof":{ + "original":"IsVersionOf", + "inverse":"HasVersion" + }, + "obsoletes":{ + "original":"Obsoletes", + "inverse":"IsObsoletedBy" + }, + "references":{ + "original":"References", + "inverse":"IsReferencedBy" + }, + "requires":{ + "original":"Requires", + "inverse":"IsRequiredBy" + }, + "related":{ + "original":"IsRelatedTo", + "inverse":"IsRelatedTo" + }, + "reviews":{ + "original":"Reviews", + "inverse":"IsReviewedBy" + }, + "unknown":{ + "original":"Unknown", + "inverse":"Unknown" + }, + "isamongtopnsimilardocuments": { + "original": "IsAmongTopNSimilarDocuments", + "inverse": "HasAmongTopNSimilarDocuments" + }, + "hasamongtopnsimilardocuments": { + "original": "HasAmongTopNSimilarDocuments", + "inverse": "IsAmongTopNSimilarDocuments" + } +} \ No newline at end of file diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala new file mode 100644 index 000000000..d171d96d9 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/ScholexplorerUtils.scala @@ -0,0 +1,258 @@ +package eu.dnetlib.dhp.sx.graph + +import com.fasterxml.jackson.databind.ObjectMapper +import eu.dnetlib.dhp.schema.oaf.{KeyValue, Result, StructuredProperty} +import eu.dnetlib.dhp.schema.sx.scholix.{ + Scholix, + ScholixCollectedFrom, + ScholixEntityId, + ScholixIdentifier, + ScholixRelationship, + ScholixResource +} +import org.json4s +import org.json4s.DefaultFormats +import org.json4s.jackson.JsonMethods.parse + +import scala.collection.JavaConverters._ +import scala.io.Source + +case class RelationInfo( + source: String, + target: String, + relclass: String, + id: String, + collectedfrom: Seq[RelKeyValue] +) {} +case class RelKeyValue(key: String, value: String) {} + +object ScholexplorerUtils { + + val OPENAIRE_IDENTIFIER_SCHEMA: String = "OpenAIRE Identifier" + val mapper = new ObjectMapper() + + case class RelationVocabulary(original: String, inverse: String) {} + + val relations: Map[String, RelationVocabulary] = { + val input = Source + .fromInputStream( + getClass.getResourceAsStream("/eu/dnetlib/dhp/sx/relation/relations.json") + ) + .mkString + implicit lazy val formats: DefaultFormats.type = org.json4s.DefaultFormats + + lazy val json: json4s.JValue = parse(input) + + json.extract[Map[String, RelationVocabulary]] + } + + def invRel(rel: String): String = { + val semanticRelation = relations.getOrElse(rel.toLowerCase, null) + if (semanticRelation != null) + semanticRelation.inverse + else + null + } + + def generateDatasourceOpenAIREURLS(id: String): String = { + if (id != null && id.length > 12) + s"https://explore.openaire.eu/search/dataprovider?datasourceId=${id.substring(3)}" + else + null + } + + def findURLForPID( + pidValue: List[StructuredProperty], + urls: List[String] + ): List[(StructuredProperty, String)] = { + pidValue.map { p => + val pv = p.getValue + + val r = urls.find(u => u.toLowerCase.contains(pv.toLowerCase)) + (p, r.orNull) + } + } + + def extractTypedIdentifierFromInstance(r: Result): List[ScholixIdentifier] = { + if (r.getInstance() == null || r.getInstance().isEmpty) + return List() + r.getInstance() + .asScala + .filter(i => i.getUrl != null && !i.getUrl.isEmpty) + .filter(i => i.getPid != null && i.getUrl != null) + .flatMap(i => findURLForPID(i.getPid.asScala.toList, i.getUrl.asScala.toList)) + .map(i => new ScholixIdentifier(i._1.getValue, i._1.getQualifier.getClassid, i._2)) + .distinct + .toList + } + + def generateScholixResourceFromResult(result: Result): ScholixResource = { + + if (result.getInstance() == null || result.getInstance().size() == 0) + return null + + if (result.getPid == null || result.getPid.isEmpty) + return null + + val r = new ScholixResource + r.setDnetIdentifier(result.getId) + + val persistentIdentifiers: List[ScholixIdentifier] = extractTypedIdentifierFromInstance(result) + if (persistentIdentifiers.isEmpty) + return null + + r.setIdentifier(persistentIdentifiers.asJava) + + r.setObjectType(result.getResulttype.getClassid) + + r.setObjectSubType( + result + .getInstance() + .asScala + .filter(i => i != null && i.getInstancetype != null) + .map(i => i.getInstancetype.getClassname) + .distinct + .head + ) + + if (result.getTitle != null && result.getTitle.asScala.nonEmpty) { + val titles: List[String] = result.getTitle.asScala.map(t => t.getValue).toList + if (titles.nonEmpty) + r.setTitle(titles.head) + else + return null + } + if (result.getAuthor != null && !result.getAuthor.isEmpty) { + val authors: List[ScholixEntityId] = + result.getAuthor.asScala + .map(a => { + val entity = new ScholixEntityId() + entity.setName(a.getFullname) + if (a.getPid != null && a.getPid.size() > 0) + entity.setIdentifiers( + a.getPid.asScala + .map(sp => { + val id = new ScholixIdentifier() + id.setIdentifier(sp.getValue) + id.setSchema(sp.getQualifier.getClassid) + id + }) + .take(3) + .toList + .asJava + ) + entity + }) + .toList + if (authors.nonEmpty) + r.setCreator(authors.asJava) + + } + + val dt: List[String] = result + .getInstance() + .asScala + .filter(i => i.getDateofacceptance != null) + .map(i => i.getDateofacceptance.getValue) + .toList + if (dt.nonEmpty) + r.setPublicationDate(dt.distinct.head) + + r.setPublisher( + result + .getInstance() + .asScala + .map(i => i.getHostedby) + .filter(h => !"unknown".equalsIgnoreCase(h.getValue)) + .map(h => { + val eid = new ScholixEntityId() + eid.setName(h.getValue) + val id = new ScholixIdentifier() + id.setIdentifier(h.getKey) + id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA) + id.setUrl(generateDatasourceOpenAIREURLS(h.getKey)) + eid.setIdentifiers(List(id).asJava) + eid + }) + .distinct + .asJava + ) + + r.setCollectedFrom( + result.getCollectedfrom.asScala + .map(cf => { + val scf = new ScholixCollectedFrom() + scf.setProvisionMode("collected") + scf.setCompletionStatus("complete") + val eid = new ScholixEntityId() + eid.setName(cf.getValue) + val id = new ScholixIdentifier() + id.setIdentifier(cf.getKey) + id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA) + id.setUrl(generateDatasourceOpenAIREURLS(cf.getKey)) + eid.setIdentifiers(List(id).asJava) + scf.setProvider(eid) + scf + }) + .asJava + ) + + r + } + + def generateScholix(relation: RelationInfo, source: ScholixResource): Scholix = { + val s: Scholix = new Scholix + s.setSource(source) + if (relation.collectedfrom != null && relation.collectedfrom.nonEmpty) + s.setLinkprovider( + relation.collectedfrom + .map(cf => { + val eid = new ScholixEntityId() + eid.setName(cf.value) + val id = new ScholixIdentifier() + id.setIdentifier(cf.key) + id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA) + id.setUrl(generateDatasourceOpenAIREURLS(cf.key)) + eid.setIdentifiers(List(id).asJava) + eid + }) + .toList + .asJava + ) + else { + val eid = new ScholixEntityId() + eid.setName("OpenAIRE") + val id = new ScholixIdentifier() + id.setIdentifier("10|infrastruct_::f66f1bd369679b5b077dcdf006089556") + id.setSchema(OPENAIRE_IDENTIFIER_SCHEMA) + id.setUrl(generateDatasourceOpenAIREURLS(id.getIdentifier)) + eid.setIdentifiers(List(id).asJava) + s.setLinkprovider(List(eid).asJava) + } + s.setIdentifier(relation.id) + val semanticRelation = relations.getOrElse(relation.relclass.toLowerCase, null) + if (semanticRelation == null) + return null + s.setRelationship( + new ScholixRelationship(semanticRelation.original, "datacite", semanticRelation.inverse) + ) + s.setPublicationDate(source.getPublicationDate) + s.setPublisher(source.getPublisher) + val mockTarget = new ScholixResource + mockTarget.setDnetIdentifier(relation.target) + s.setTarget(mockTarget) + s + } + + def updateTarget(s: Scholix, t: ScholixResource): String = { + + s.setTarget(t) + val spublishers: Seq[ScholixEntityId] = + if (s.getPublisher != null && !s.getPublisher.isEmpty) s.getPublisher.asScala else List() + val tpublishers: Seq[ScholixEntityId] = + if (t.getPublisher != null && !t.getPublisher.isEmpty) t.getPublisher.asScala else List() + val mergedPublishers = spublishers.union(tpublishers).distinct.take(10).toList + s.setPublisher(mergedPublishers.asJava) + mapper.writeValueAsString(s) + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala new file mode 100644 index 000000000..dd420ab95 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/main/scala/eu/dnetlib/dhp/sx/graph/SparkCreateScholexplorerDump.scala @@ -0,0 +1,141 @@ +package eu.dnetlib.dhp.sx.graph + +import eu.dnetlib.dhp.application.AbstractScalaApplication +import eu.dnetlib.dhp.schema.oaf.{ + KeyValue, + OtherResearchProduct, + Publication, + Relation, + Result, + Software, + Dataset => OafDataset +} +import eu.dnetlib.dhp.schema.sx.scholix.{Scholix, ScholixResource} +import org.apache.spark.sql.functions.{col, concat, expr, first, md5} +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql._ +import org.slf4j.{Logger, LoggerFactory} + +class SparkCreateScholexplorerDump(propertyPath: String, args: Array[String], log: Logger) + extends AbstractScalaApplication(propertyPath, args, log: Logger) { + + /** Here all the spark applications runs this method + * where the whole logic of the spark node is defined + */ + override def run(): Unit = { + val sourcePath = parser.get("sourcePath") + log.info("sourcePath: {}", sourcePath) + val targetPath = parser.get("targetPath") + log.info("targetPath: {}", targetPath) + generateBidirectionalRelations(sourcePath, targetPath, spark) + generateScholixResource(sourcePath, targetPath, spark) + generateScholix(targetPath, spark) + } + + def generateScholixResource(inputPath: String, outputPath: String, spark: SparkSession): Unit = { + val entityMap: Map[String, StructType] = Map( + "publication" -> Encoders.bean(classOf[Publication]).schema, + "dataset" -> Encoders.bean(classOf[OafDataset]).schema, + "software" -> Encoders.bean(classOf[Software]).schema, + "otherresearchproduct" -> Encoders.bean(classOf[OtherResearchProduct]).schema + ) + + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource]) + implicit val resultEncoder: Encoder[Result] = Encoders.bean(classOf[Result]) + + val resDs = spark.emptyDataset[ScholixResource] + val scholixResourceDS = entityMap.foldLeft[Dataset[ScholixResource]](resDs)((res, item) => { + println(s"adding ${item._1}") + res.union( + spark.read + .schema(item._2) + .json(s"$inputPath/${item._1}") + .as[Result] + .map(r => ScholexplorerUtils.generateScholixResourceFromResult(r)) + .filter(s => s != null) + ) + }) + scholixResourceDS.write.mode(SaveMode.Overwrite).save(s"$outputPath/resource") + } + + def generateBidirectionalRelations(inputPath: String, otuputPath: String, spark: SparkSession): Unit = { + val relSchema = Encoders.bean(classOf[Relation]).schema + + val relDF = spark.read + .schema(relSchema) + .json(s"$inputPath/relation") + .where( + "datainfo.deletedbyinference is false and source like '50%' and target like '50%' " + + "and relClass <> 'merges' and relClass <> 'isMergedIn'" + ) + .select("source", "target", "collectedfrom", "relClass") + + def invRel: String => String = { s => + ScholexplorerUtils.invRel(s) + } + + import org.apache.spark.sql.functions.udf + val inverseRelationUDF = udf(invRel) + val inverseRelation = relDF.select( + col("target").alias("source"), + col("source").alias("target"), + col("collectedfrom"), + inverseRelationUDF(col("relClass")).alias("relClass") + ) + + val bidRel = inverseRelation + .union(relDF) + .withColumn("id", md5(concat(col("source"), col("relClass"), col("target")))) + .withColumn("cf", expr("transform(collectedfrom, x -> struct(x.key, x.value))")) + .drop("collectedfrom") + .withColumnRenamed("cf", "collectedfrom") + .groupBy(col("id")) + .agg( + first("source").alias("source"), + first("target").alias("target"), + first("relClass").alias("relClass"), + first("collectedfrom").alias("collectedfrom") + ) + + bidRel.write.mode(SaveMode.Overwrite).save(s"$otuputPath/relation") + + } + + def generateScholix(outputPath: String, spark: SparkSession): Unit = { + implicit val scholixResourceEncoder: Encoder[ScholixResource] = Encoders.bean(classOf[ScholixResource]) + implicit val scholixEncoder: Encoder[Scholix] = Encoders.kryo(classOf[Scholix]) + + import spark.implicits._ + val relations = spark.read.load(s"$outputPath/relation").as[RelationInfo] + val resource = spark.read.load(s"$outputPath/resource").as[ScholixResource] + + val scholix_one_verse = relations + .joinWith(resource, relations("source") === resource("dnetIdentifier"), "inner") + .map(res => ScholexplorerUtils.generateScholix(res._1, res._2)) + .map(s => (s.getIdentifier, s))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[Scholix]))) + + val resourceTarget = relations + .joinWith(resource, relations("target") === resource("dnetIdentifier"), "inner") + .map(res => (res._1.id, res._2))(Encoders.tuple(Encoders.STRING, Encoders.kryo(classOf[ScholixResource]))) + + scholix_one_verse + .joinWith(resourceTarget, scholix_one_verse("_1") === resourceTarget("_1"), "inner") + .map(k => ScholexplorerUtils.updateTarget(k._1._2, k._2._2)) + .write + .mode(SaveMode.Overwrite) + .option("compression", "gzip") + .text(s"$outputPath/scholix") + } +} + +object SparkCreateScholexplorerDump { + val logger: Logger = LoggerFactory.getLogger(SparkCreateScholexplorerDump.getClass) + + def main(args: Array[String]): Unit = { + new SparkCreateScholexplorerDump( + log = logger, + args = args, + propertyPath = "/eu/dnetlib/dhp/sx/create_scholix_dump_params.json" + ).initialize().run() + } +} diff --git a/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala new file mode 100644 index 000000000..204fe9794 --- /dev/null +++ b/dhp-workflows/dhp-graph-mapper/src/test/scala/eu/dnetlib/dhp/sx/graph/scholix/ScholixGenerationTest.scala @@ -0,0 +1,26 @@ +package eu.dnetlib.dhp.sx.graph.scholix + +import eu.dnetlib.dhp.schema.sx.scholix.ScholixResource +import eu.dnetlib.dhp.sx.graph.SparkCreateScholexplorerDump +import org.apache.spark.SparkConf +import org.apache.spark.sql.{Encoder, Encoders, SparkSession} +import org.junit.jupiter.api.Test +import org.objenesis.strategy.StdInstantiatorStrategy + +class ScholixGenerationTest { + + @Test + def generateScholix(): Unit = { + + val spark: SparkSession = SparkSession.builder().master("local[*]").getOrCreate() + val app = new SparkCreateScholexplorerDump(null, null, null) +// app.generateScholixResource("/home/sandro/Downloads/scholix_sample/", "/home/sandro/Downloads/scholix/", spark) +// app.generateBidirectionalRelations( +// "/home/sandro/Downloads/scholix_sample/", +// "/home/sandro/Downloads/scholix/", +// spark +// ) + app.generateScholix("/home/sandro/Downloads/scholix/", spark) + + } +} diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java index d46ab1404..351526336 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/PayloadConverterJob.java @@ -11,6 +11,7 @@ import java.util.Optional; import org.apache.commons.io.IOUtils; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; import org.apache.spark.util.LongAccumulator; @@ -29,6 +30,8 @@ import eu.dnetlib.dhp.oa.provision.model.ProvisionModelSupport; import eu.dnetlib.dhp.oa.provision.model.TupleWrapper; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; +import eu.dnetlib.dhp.schema.oaf.DataInfo; +import eu.dnetlib.dhp.schema.oaf.Oaf; import eu.dnetlib.dhp.schema.solr.SolrRecord; import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; @@ -115,6 +118,12 @@ public class PayloadConverterJob { .read() .load(toSeq(paths)) .as(Encoders.kryo(JoinedEntity.class)) + .filter( + (FilterFunction) je -> !Optional + .ofNullable(je.getEntity()) + .map(Oaf::getDataInfo) + .map(DataInfo::getDeletedbyinference) + .orElse(false)) .map( (MapFunction>) je -> new Tuple2<>( recordFactory.build(je, validateXML), diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java index da3915aee..a0692cb34 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/JoinedEntity.java @@ -5,14 +5,12 @@ import java.io.Serializable; import java.util.LinkedList; import java.util.List; -import com.fasterxml.jackson.annotation.JsonSubTypes; -import com.fasterxml.jackson.annotation.JsonTypeInfo; - -import eu.dnetlib.dhp.schema.common.EntityType; import eu.dnetlib.dhp.schema.oaf.*; public class JoinedEntity implements Serializable { + private static final long serialVersionUID = -6337458773099581114L; + private OafEntity entity; private List links; diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java index 66a920231..4a2326453 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/model/ProvisionModelSupport.java @@ -30,12 +30,14 @@ import eu.dnetlib.dhp.schema.solr.Context; import eu.dnetlib.dhp.schema.solr.Country; import eu.dnetlib.dhp.schema.solr.Datasource; import eu.dnetlib.dhp.schema.solr.EoscIfGuidelines; +import eu.dnetlib.dhp.schema.solr.ExternalReference; import eu.dnetlib.dhp.schema.solr.Instance; import eu.dnetlib.dhp.schema.solr.Journal; import eu.dnetlib.dhp.schema.solr.Measure; import eu.dnetlib.dhp.schema.solr.OpenAccessColor; import eu.dnetlib.dhp.schema.solr.OpenAccessRoute; import eu.dnetlib.dhp.schema.solr.Organization; +import eu.dnetlib.dhp.schema.solr.Pid; import eu.dnetlib.dhp.schema.solr.Project; import eu.dnetlib.dhp.schema.solr.Result; import eu.dnetlib.dhp.schema.solr.Subject; @@ -375,7 +377,7 @@ public class ProvisionModelSupport { rs.setIsInDiamondJournal(r.getIsInDiamondJournal()); rs.setPubliclyFunded(r.getPubliclyFunded()); rs.setTransformativeAgreement(r.getTransformativeAgreement()); - + rs.setExternalReference(mapExternalReference(r.getExternalReference())); rs.setInstance(mapInstances(r.getInstance())); if (r instanceof Publication) { @@ -561,6 +563,27 @@ public class ProvisionModelSupport { .orElse(null); } + private static List mapExternalReference( + List externalReference) { + return Optional + .ofNullable(externalReference) + .map( + ext -> ext + .stream() + .map( + e -> ExternalReference + .newInstance( + e.getSitename(), + e.getLabel(), + e.getAlternateLabel(), + e.getUrl(), + mapCodeLabel(e.getQualifier()), + e.getRefidentifier(), + e.getQuery())) + .collect(Collectors.toList())) + .orElse(Lists.newArrayList()); + } + private static List asContext(List ctxList, ContextMapper contextMapper) { @@ -579,7 +602,7 @@ public class ProvisionModelSupport { } return Optional - .ofNullable(contexts) + .of(contexts) .map( ctx -> ctx .stream() diff --git a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java index 493c3db02..3c8f5cef5 100644 --- a/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java +++ b/dhp-workflows/dhp-graph-provision/src/main/java/eu/dnetlib/dhp/oa/provision/utils/XmlRecordFactory.java @@ -216,6 +216,13 @@ public class XmlRecordFactory implements Serializable { if (entity.getMeasures() != null) { metadata.addAll(measuresAsXml(entity.getMeasures())); } + if (entity.getContext() != null) { + contexts.addAll(entity.getContext().stream().map(Context::getId).collect(Collectors.toList())); + /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ + if (contexts.contains("dh-ch::subcommunity::2")) { + contexts.add("clarin"); + } + } if (ModelSupport.isResult(type)) { final Result r = (Result) entity; @@ -242,14 +249,6 @@ public class XmlRecordFactory implements Serializable { .collect(Collectors.toList())); } - if (r.getContext() != null) { - contexts.addAll(r.getContext().stream().map(c -> c.getId()).collect(Collectors.toList())); - /* FIXME: Workaround for CLARIN mining issue: #3670#note-29 */ - if (contexts.contains("dh-ch::subcommunity::2")) { - contexts.add("clarin"); - } - } - if (r.getTitle() != null) { metadata .addAll( @@ -1601,9 +1600,7 @@ public class XmlRecordFactory implements Serializable { private List buildContexts(final String type, final Set contexts) { final List res = Lists.newArrayList(); - if (contextMapper != null - && !contextMapper.isEmpty() - && MainEntityType.result.toString().equals(type)) { + if (contextMapper != null && !contextMapper.isEmpty()) { XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot"); diff --git a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java index b738498e2..dcd021db1 100644 --- a/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java +++ b/dhp-workflows/dhp-graph-provision/src/test/java/eu/dnetlib/dhp/oa/provision/XmlRecordFactoryTest.java @@ -1,8 +1,7 @@ package eu.dnetlib.dhp.oa.provision; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import java.io.IOException; import java.io.StringReader; @@ -22,6 +21,7 @@ import com.google.common.collect.Lists; import eu.dnetlib.dhp.oa.provision.model.JoinedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntity; import eu.dnetlib.dhp.oa.provision.model.RelatedEntityWrapper; +import eu.dnetlib.dhp.oa.provision.utils.ContextDef; import eu.dnetlib.dhp.oa.provision.utils.ContextMapper; import eu.dnetlib.dhp.oa.provision.utils.XmlRecordFactory; import eu.dnetlib.dhp.schema.oaf.*; @@ -51,7 +51,7 @@ public class XmlRecordFactoryTest { assertNotNull(doc); - // System.out.println(doc.asXML()); + System.out.println(doc.asXML()); assertEquals("0000-0001-9613-6638", doc.valueOf("//creator[@rank = '1']/@orcid")); assertEquals("0000-0001-9613-6639", doc.valueOf("//creator[@rank = '1']/@orcid_pending")); @@ -269,4 +269,39 @@ public class XmlRecordFactoryTest { } + @Test + public void test_AKA_project() throws DocumentException, IOException { + final ContextMapper contextMapper = new ContextMapper(); + + contextMapper + .put("dh-ch", new ContextDef("dh-ch", "Digital Humanities and Cultural Heritage", "context", "community")); + contextMapper.put("dh-ch::projects", new ContextDef("dh-ch::projects", "DH-CH Projects", "category", "")); + contextMapper + .put("dh-ch::projects::2", new ContextDef("dh-ch::projects::2", "ARIADNE", "concept", "community")); + + final XmlRecordFactory xmlRecordFactory = new XmlRecordFactory(contextMapper, false, + PayloadConverterJob.schemaLocation); + + final Project p = OBJECT_MAPPER + .readValue( + IOUtils.toString(getClass().getResourceAsStream("project_aka.json")), + Project.class); + + assertNotNull(p.getContext()); + assertEquals(1, p.getContext().size()); + assertEquals("dh-ch::projects::2", p.getContext().get(0).getId()); + + final String xml = xmlRecordFactory.build(new JoinedEntity(p)); + + assertNotNull(xml); + + final Document doc = new SAXReader().read(new StringReader(xml)); + + assertNotNull(doc); + + assertEquals("dh-ch", doc.valueOf("//context/@id")); + assertEquals("dh-ch::projects", doc.valueOf("//context/category/@id")); + assertEquals("dh-ch::projects::2", doc.valueOf("//context/category/concept/@id")); + } + } diff --git a/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json new file mode 100644 index 000000000..18bd5adf4 --- /dev/null +++ b/dhp-workflows/dhp-graph-provision/src/test/resources/eu/dnetlib/dhp/oa/provision/project_aka.json @@ -0,0 +1 @@ +{"context" : [{"dataInfo":[{"deletedbyinference":false,"inferenceprovenance":"bulktagging","inferred":true,"invisible":false,"provenanceaction":{"classid":"bulktagging:community:subject","classname":"Bulk Tagging for Communities","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.9"}],"id":"dh-ch::projects::2"}], "callidentifier":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Virkapäätöksiin liittyvä yleiskustannusosuus KY"},"code":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"204684"},"collectedfrom":[{"key":"10|openaire____::6ac933301a3933c8a22ceebea7000326","value":"Academy of Finland"}],"currency":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"EUR"},"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"dateofcollection":"2019-01-25","dateoftransformation":"2022-02-08","duration":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"0"},"ecarticle29_3":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"ecsc39":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"enddate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-12-31"},"extraInfo":[],"fundedamount":3230.0,"fundingtree":[{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"\n aka_________::AKA\n AKA\n Academy of Finland\n Academy of Finland\n FI\n "}],"id":"40|aka_________::00f0012ac67a2f826f2e98dbdfd6b058","jsonextrainfo":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"{}"},"lastupdatetimestamp":1719997513126,"oamandatepublications":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"false"},"optional1":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"3,230 €"},"originalId":["aka_________::204684"],"pid":[],"startdate":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"2002-12-01"},"subjects":[],"title":{"dataInfo":{"deletedbyinference":false,"inferred":false,"invisible":false,"provenanceaction":{"classid":"sysimport:crosswalk:entityregistry","classname":"Harvested","schemeid":"dnet:provenanceActions","schemename":"dnet:provenanceActions"},"trust":"0.900"},"value":"Tutkijan virkaan liittyvä yleiskustannusosuus suorituspaikalle"},"totalcost":0.0} \ No newline at end of file diff --git a/dhp-workflows/dhp-stats-actionsets/pom.xml b/dhp-workflows/dhp-stats-actionsets/pom.xml index 3daa8f995..499c598f0 100644 --- a/dhp-workflows/dhp-stats-actionsets/pom.xml +++ b/dhp-workflows/dhp-stats-actionsets/pom.xml @@ -16,11 +16,11 @@ org.apache.spark - spark-core_2.11 + spark-core_${scala.binary.version} org.apache.spark - spark-sql_2.11 + spark-sql_${scala.binary.version} diff --git a/dhp-workflows/dhp-stats-hist-snaps/pom.xml b/dhp-workflows/dhp-stats-hist-snaps/pom.xml index b31d909f9..8961f919a 100644 --- a/dhp-workflows/dhp-stats-hist-snaps/pom.xml +++ b/dhp-workflows/dhp-stats-hist-snaps/pom.xml @@ -10,11 +10,11 @@ org.apache.spark - spark-core_2.11 + spark-core_${scala.binary.version} org.apache.spark - spark-sql_2.11 + spark-sql_${scala.binary.version} diff --git a/dhp-workflows/dhp-stats-monitor-irish/pom.xml b/dhp-workflows/dhp-stats-monitor-irish/pom.xml index 6ab19dced..600632364 100644 --- a/dhp-workflows/dhp-stats-monitor-irish/pom.xml +++ b/dhp-workflows/dhp-stats-monitor-irish/pom.xml @@ -10,11 +10,11 @@ org.apache.spark - spark-core_2.11 + spark-core_${scala.binary.version} org.apache.spark - spark-sql_2.11 + spark-sql_${scala.binary.version} diff --git a/dhp-workflows/dhp-stats-monitor-update/pom.xml b/dhp-workflows/dhp-stats-monitor-update/pom.xml index f2bc35f8d..86d5135fa 100644 --- a/dhp-workflows/dhp-stats-monitor-update/pom.xml +++ b/dhp-workflows/dhp-stats-monitor-update/pom.xml @@ -10,11 +10,11 @@ org.apache.spark - spark-core_2.11 + spark-core_${scala.binary.version} org.apache.spark - spark-sql_2.11 + spark-sql_${scala.binary.version} diff --git a/dhp-workflows/dhp-swh/pom.xml b/dhp-workflows/dhp-swh/pom.xml index 80fff4587..ef9049b20 100644 --- a/dhp-workflows/dhp-swh/pom.xml +++ b/dhp-workflows/dhp-swh/pom.xml @@ -51,49 +51,6 @@ hadoop-distcp - - eu.dnetlib - dnet-actionmanager-api - - - eu.dnetlib - dnet-actionmanager-common - - - eu.dnetlib - dnet-openaireplus-mapping-utils - - - saxonica - saxon - - - saxonica - saxon-dom - - - jgrapht - jgrapht - - - net.sf.ehcache - ehcache - - - org.springframework - spring-test - - - org.apache.* - * - - - apache - * - - - - org.apache.httpcomponents httpclient diff --git a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java index 2691d4b7e..230a077f7 100644 --- a/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java +++ b/dhp-workflows/dhp-swh/src/main/java/eu/dnetlib/dhp/swh/PrepareSWHActionsets.java @@ -17,6 +17,7 @@ import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.api.java.function.FilterFunction; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.*; import org.apache.spark.sql.Dataset; @@ -117,7 +118,7 @@ public class PrepareSWHActionsets { .map( (MapFunction) t -> OBJECT_MAPPER.readValue(t, Software.class), Encoders.bean(Software.class)) - .filter(t -> t.getCodeRepositoryUrl() != null) + .filter((FilterFunction) t -> t.getCodeRepositoryUrl() != null) .select(col("id"), col("codeRepositoryUrl.value").as("repoUrl")); } diff --git a/dhp-workflows/dhp-usage-raw-data-update/pom.xml b/dhp-workflows/dhp-usage-raw-data-update/pom.xml index a9dbb09ae..8ce9826e2 100644 --- a/dhp-workflows/dhp-usage-raw-data-update/pom.xml +++ b/dhp-workflows/dhp-usage-raw-data-update/pom.xml @@ -39,8 +39,8 @@ UTF-8 UTF-8 - 0.13.1-cdh5.2.1 - 2.5.0-cdh5.2.1 + 1.1.0-cdh5.16.2 + 2.6.0-cdh5.16.2 @@ -72,7 +72,13 @@ org.apache.hadoop hadoop-common ${cdh.hadoop.version} - + + + jdk.tools + jdk.tools + + + eu.dnetlib.dhp dhp-common diff --git a/dhp-workflows/dhp-usage-stats-build/pom.xml b/dhp-workflows/dhp-usage-stats-build/pom.xml index 56aec73b7..4dd987f51 100644 --- a/dhp-workflows/dhp-usage-stats-build/pom.xml +++ b/dhp-workflows/dhp-usage-stats-build/pom.xml @@ -39,8 +39,8 @@ UTF-8 UTF-8 - 0.13.1-cdh5.2.1 - 2.5.0-cdh5.2.1 + 1.1.0-cdh5.16.2 + 2.6.0-cdh5.16.2 @@ -67,11 +67,23 @@ org.apache.hive hive-jdbc ${cdh.hive.version} - + + + jdk.tools + jdk.tools + + + org.apache.hadoop hadoop-common ${cdh.hadoop.version} + + + jdk.tools + jdk.tools + + eu.dnetlib.dhp diff --git a/pom.xml b/pom.xml index 6428e3bda..666ba2350 100644 --- a/pom.xml +++ b/pom.xml @@ -1,955 +1,1101 @@ - 4.0.0 - eu.dnetlib.dhp - dhp - 1.2.5-SNAPSHOT - pom - - - - GNU Affero General Public License v3.0 or later - https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText - repo - This program is free software: you can redistribute it and/or modify it under the terms of the - GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the - License, or (at your option) any later version. - - - - - dhp-build - dhp-pace-core - dhp-common - dhp-workflows - - - - Redmine - https://support.openaire.eu/projects/openaire - - - - jenkins - https://jenkins-dnet.d4science.org/ - - - - scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git - scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git - https://code-repo.d4science.org/D-Net/dnet-hadoop/ - HEAD - - - This module is the root descriptor for the dnet-hadoop project - - - - - - - dnet45-releases - D-Net 45 releases - https://maven.d4science.org/nexus/content/repositories/dnet45-releases - default - - false - - - true - - - - dnet45-snapshots - D-Net 45 snapshots - https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots - default - - true - - - false - - - - dnet45-bootstrap-snapshot - D-Net 45 Bootstrap Snapshot - https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-snapshot/ - - false - - - true - - default - - - dnet45-bootstrap-release - D-Net 45 Bootstrap Release - https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-release/ - - true - - - false - - default - - - cloudera - Cloudera Repository - https://repository.cloudera.com/artifactory/cloudera-repos - - true - - - false - - - - dnet-deps - dnet-dependencies - https://maven.d4science.org/nexus/content/repositories/dnet-deps - default - - - maven-restlet - Restlet repository - https://maven.restlet.talend.com - - - conjars - conjars - https://conjars.wensel.net/repo/ - - - - - - org.junit.jupiter - junit-jupiter - ${junit-jupiter.version} - test - - - - org.mockito - mockito-core - ${mockito-core.version} - test - - - - org.mockito - mockito-junit-jupiter - ${mockito-core.version} - test - - - - - - - - eu.dnetlib.dhp - ${dhp-schemas.artifact} - ${dhp-schemas.version} - - - org.apache.hadoop - hadoop-hdfs - ${dhp.hadoop.version} - provided - - - org.apache.hadoop - hadoop-common - ${dhp.hadoop.version} - provided - - - org.apache.hadoop - hadoop-client - ${dhp.hadoop.version} - provided - - - org.apache.hadoop - hadoop-distcp - ${dhp.hadoop.version} - provided - - - org.apache.spark - spark-core_${scala.binary.version} - ${dhp.spark.version} - provided - - - org.apache.spark - spark-sql_${scala.binary.version} - ${dhp.spark.version} - provided - - - org.apache.spark - spark-graphx_${scala.binary.version} - ${dhp.spark.version} - provided - - - org.apache.spark - spark-hive_${scala.binary.version} - ${dhp.spark.version} - test - - - - org.slf4j - jcl-over-slf4j - 1.7.25 - provided - - - - org.apache.commons - commons-lang3 - ${dhp.commons.lang.version} - - - - commons-validator - commons-validator - 1.7 - - - - com.github.sisyphsu - dateparser - 1.0.7 - - - - me.xuender - unidecode - 0.0.7 - - - - com.google.guava - guava - ${dhp.guava.version} - - - - - commons-codec - commons-codec - 1.9 - - - - commons-io - commons-io - 2.4 - - - - commons-cli - commons-cli - 1.2 - provided - - - - net.sf.saxon - Saxon-HE - 9.9.1-6 - - - - dom4j - dom4j - 1.6.1 - - - - xml-apis - xml-apis - 1.4.01 - - - - jaxen - jaxen - 1.1.6 - - - - com.mycila.xmltool - xmltool - 3.3 - - - - org.apache.solr - solr-solrj - ${solr.version} - - - * - * - - - - - com.lucidworks.spark - spark-solr - ${sparksolr.version} - - - * - * - - - - - org.apache.solr - solr-test-framework - ${solr.version} - test - - - io.dropwizard.metrics - metrics-core - 3.2.6 - test - - - - - org.apache.httpcomponents - httpclient - ${org.apache.httpcomponents.version} - - - org.apache.httpcomponents - httpmime - ${org.apache.httpcomponents.version} - - - org.noggit - noggit - 0.8 - - - org.apache.zookeeper - zookeeper - 3.4.11 - - - - net.schmizz - sshj - 0.10.0 - test - - - - com.fasterxml.jackson.core - jackson-core - ${dhp.jackson.version} - provided - - - - com.fasterxml.jackson.core - jackson-annotations - ${dhp.jackson.version} - provided - - - com.fasterxml.jackson.core - jackson-databind - ${dhp.jackson.version} - provided - - - - eu.dnetlib - dnet-actionmanager-common - ${dnet-actionmanager-common.version} - - - org.apache.hadoop - hadoop-common - - - - - eu.dnetlib - dnet-actionmanager-api - ${dnet-actionmanager-api.version} - - - eu.dnetlib - cnr-misc-utils - - - - - - eu.dnetlib - cnr-rmi-api - ${cnr-rmi-api.version} - - - - eu.dnetlib.dhp - dnet-openaire-broker-common - ${dnet-openaire-broker-common.version} - - - - org.apache.cxf - cxf-rt-transports-http - 3.1.5 - - - javax.persistence - javax.persistence-api - 2.2 - provided - - - - com.jayway.jsonpath - json-path - 2.4.0 - - - com.arakelian - java-jq - 0.10.1 - - - edu.cmu - secondstring - 1.0.0 - - - org.mongodb - mongo-java-driver - ${mongodb.driver.version} - - - io.fares.junit.mongodb - mongodb-junit-test - 1.1.0 - - - org.postgresql - postgresql - 42.2.10 - - - - org.antlr - stringtemplate - 3.2.1 - - - - org.antlr - ST4 - 4.3.4 - - - - com.ximpleware - vtd-xml - ${vtd.version} - - - - org.elasticsearch - elasticsearch-hadoop - 7.6.0 - - - - - org.apache.oozie - oozie-client - ${dhp.oozie.version} - provided - - - - slf4j-simple - org.slf4j - - - - - - - com.squareup.okhttp3 - okhttp - ${okhttp.version} - - - - org.apache.commons - commons-compress - ${common.compress.version} - - - - - org.apache.commons - commons-csv - ${common.csv.version} - - - - - - org.apache.poi - poi-ooxml - ${apache.poi.version} - - - - org.json - json - 20180813 - - - - org.json4s - json4s-jackson_${scala.binary.version} - ${json4s.version} - - - - com.github.victools - jsonschema-generator - ${jsonschemagenerator.version} - - - - org.apache.commons - commons-text - ${common.text.version} - - - - com.opencsv - opencsv - 5.5 - - - io.github.classgraph - classgraph - 4.8.71 - - - - com.fasterxml.jackson.dataformat - jackson-dataformat-xml - ${jackson.version} - provided - - - com.fasterxml.jackson.module - jackson-module-jsonSchema - ${jackson.version} - provided - - - - - org.apache.commons - commons-math3 - 3.6.1 - - - - - com.google.code.gson - gson - ${google.gson.version} - - - - commons-collections - commons-collections - ${commons.collections.version} - - - commons-logging - commons-logging - ${commons.logging.version} - - - - org.reflections - reflections - 0.9.10 - - - - org.scala-lang - scala-library - ${scala.version} - - - - com.ibm.icu - icu4j - 70.1 - - - - - - target - target/classes - ${project.artifactId}-${project.version} - target/test-classes - - - - org.apache.maven.plugins - maven-plugin-plugin - 3.3 - - - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.0.0 - - - org.apache.maven.plugins - maven-site-plugin - 3.9.1 - - ${dhp.site.skip} - - - - - org.apache.maven.plugins - maven-compiler-plugin - ${maven.compiler.plugin.version} - - 1.8 - 1.8 - ${project.build.sourceEncoding} - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.2 - - - - org.apache.maven.plugins - maven-source-plugin - 3.0.1 - - - attach-sources - verify - - jar-no-fork - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 3.0.0-M4 - - true - - - - org.apache.maven.plugins - maven-javadoc-plugin - 3.2.0 - - true - none - - - - org.apache.maven.plugins - maven-dependency-plugin - 3.6.0 - - - - net.revelc.code.formatter - formatter-maven-plugin - 2.11.0 - - - eu.dnetlib.dhp - dhp-code-style - ${project.version} - - - - - org.antipathy - mvn-scalafmt_${scala.binary.version} - 1.0.1640073709.733712b - - - eu.dnetlib.dhp - dhp-code-style - ${project.version} - - - - - - - - org.apache.maven.plugins - maven-site-plugin - - - org.apache.maven.plugins - maven-project-info-reports-plugin - - - net.revelc.code.formatter - formatter-maven-plugin - - - - format - - - eclipse/formatter_dnet.xml - - - - - - net.revelc.code - impsort-maven-plugin - 1.4.1 - - java.,javax.,org.,com. - java,* - - **/thrift/*.java - - - - - sort-imports - - sort - - - - - - org.antipathy - mvn-scalafmt_${scala.binary.version} - - https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf - false - false - - ${project.basedir}/src/main/scala - - - ${project.basedir}/src/test/scala - - false - false - : git rev-parse --abbrev-ref HEAD - false - - - - validate - - format - - - - - - org.apache.maven.plugins - maven-release-plugin - 2.5.3 - - - org.jacoco - jacoco-maven-plugin - 0.7.9 - - - **/schemas/* - **/com/cloudera/**/* - **/org/apache/avro/io/**/* - - - - - default-prepare-agent - - prepare-agent - - - - default-report - prepare-package - - report - - - - - - - - - - org.apache.maven.wagon - wagon-ssh - 2.10 - - - - - - dnet45-snapshots - DNet45 Snapshots - https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots - default - - - dnet45-releases - https://maven.d4science.org/nexus/content/repositories/dnet45-releases - - - DHPSite - ${dhp.site.stage.path}/ - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - - true - none - - - - - - - sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop - UTF-8 - UTF-8 - 3.6.0 - 1.8 - 1.8 - 2.22.2 - 2.0.1 - cdh5.9.2 - 2.6.0-${dhp.cdh.version} - 4.1.0-${dhp.cdh.version} - dhp-schemas - 3.6.0 - 2.4.0.cloudera2 - 2.9.6 - 3.5 - true - 11.0.2 - 2.11.12 - 2.11 - 1.3.0 - 5.6.1 - 3.3.3 - 3.4.2 - [2.12,3.0) - [6.1.3] - [4.0.3] - [6.0.5] - [3.1.6] - [2.6.1] - 7.5.0 - 4.7.2 - 1.20 - 3.5.3 - 4.13.0 - 1.8 - 4.1.2 - 1.8 - 4.5.3 - 4.0.1 - 2.2.2 - 1.1.3 - 3.2.1 - - - - - - scala-2.12 - - 2.12 - 2.12.18 - - - - 4.0.2 - 3.4.1 - 2.14.2 - 3.12.0 - 3.7.0-M11 - 4.8.1 - - - - - - - - arm-silicon-mac - - - aarch64 - mac - - - - - - org.xerial.snappy - snappy-java - 1.1.8.4 - - - - + 4.0.0 + eu.dnetlib.dhp + dhp + 1.2.5-SNAPSHOT + pom + + + + GNU Affero General Public License v3.0 or later + https://spdx.org/licenses/AGPL-3.0-or-later.html#licenseText + repo + This program is free software: you can redistribute it and/or modify it under the terms of the + GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + + + + + dhp-build + dhp-pace-core + dhp-common + dhp-workflows + dhp-shade-package + + + + Redmine + https://support.openaire.eu/projects/openaire + + + + jenkins + https://jenkins-dnet.d4science.org/ + + + + scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git + scm:git:gitea@code-repo.d4science.org:D-Net/dnet-hadoop.git + https://code-repo.d4science.org/D-Net/dnet-hadoop/ + HEAD + + + This module is the root descriptor for the dnet-hadoop project + + + + + + + + Openaire-third-parties-snaphot + Openaire third parties Snapshot + https://maven.d4science.org/nexus/content/repositories/Openaire-third-parties-snaphot/ + + false + + + true + + + + + dnet45-releases + D-Net 45 releases + https://maven.d4science.org/nexus/content/repositories/dnet45-releases + default + + false + + + true + + + + dnet45-snapshots + D-Net 45 snapshots + https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots + default + + true + + + false + + + + dnet45-bootstrap-snapshot + D-Net 45 Bootstrap Snapshot + https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-snapshot/ + + false + + + true + + default + + + dnet45-bootstrap-release + D-Net 45 Bootstrap Release + https://maven.d4science.org/nexus/content/repositories/dnet45-bootstrap-release/ + + true + + + false + + default + + + cloudera + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos + + true + + + false + + + + dnet-deps + dnet-dependencies + https://maven.d4science.org/nexus/content/repositories/dnet-deps + default + + + maven-restlet + Restlet repository + https://maven.restlet.talend.com + + + conjars + conjars + https://conjars.wensel.net/repo/ + + + + + + + org.projectlombok + lombok + 1.18.28 + provided + + + org.junit.jupiter + junit-jupiter + ${junit-jupiter.version} + test + + + + org.mockito + mockito-core + ${mockito-core.version} + test + + + + org.mockito + mockito-junit-jupiter + ${mockito-core.version} + test + + + + + + + + eu.dnetlib.dhp + dhp-schemas + ${dhp-schemas.version} + + + org.apache.hadoop + hadoop-hdfs + ${dhp.hadoop.version} + provided + + + org.apache.hadoop + hadoop-common + ${dhp.hadoop.version} + provided + + + org.apache.hadoop + hadoop-client + ${dhp.hadoop.version} + provided + + + org.apache.hadoop + hadoop-distcp + ${dhp.hadoop.version} + provided + + + org.apache.spark + spark-core_${scala.binary.version} + ${dhp.spark.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${dhp.spark.version} + provided + + + org.apache.spark + spark-graphx_${scala.binary.version} + ${dhp.spark.version} + provided + + + org.apache.spark + spark-hive_${scala.binary.version} + ${dhp.spark.version} + test + + + + org.slf4j + slf4j-api + ${org.slf4j.version} + provided + + + + org.slf4j + slf4j-log4j12 + ${org.slf4j.version} + provided + + + + org.slf4j + jcl-over-slf4j + ${org.slf4j.version} + provided + + + + org.apache.logging.log4j + log4j-slf4j2-impl + ${log4j.version} + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + + + + org.apache.logging.log4j + log4j-1.2-api + ${log4j.version} + + + + org.apache.commons + commons-lang3 + ${dhp.commons.lang.version} + + + + org.apache.commons + commons-beanutils + ${commons-beanutils.version} + + + + + commons-validator + commons-validator + ${commons-validator.version} + + + + com.github.sisyphsu + dateparser + ${dateparser.version} + + + + me.xuender + unidecode + ${unidecode.version} + + + + com.google.guava + guava + ${dhp.guava.version} + + + + + commons-codec + commons-codec + ${commons-codec.version} + + + + commons-io + commons-io + ${commons-io.version} + + + + commons-cli + commons-cli + 1.2 + provided + + + + net.sf.saxon + Saxon-HE + 9.9.1-6 + + + + dom4j + dom4j + 1.6.1 + + + + xml-apis + xml-apis + 1.4.01 + + + + jaxen + jaxen + 1.1.6 + + + + com.mycila.xmltool + xmltool + 3.3 + + + + org.apache.solr + solr-solrj + ${solr.version} + + + * + * + + + + + com.lucidworks.spark + spark-solr + ${sparksolr.version} + + + * + * + + + + + org.apache.solr + solr-test-framework + ${solr.version} + test + + + io.dropwizard.metrics + metrics-core + 3.2.6 + test + + + + + org.apache.httpcomponents + httpclient + ${org.apache.httpcomponents.version} + + + org.apache.httpcomponents + httpmime + ${org.apache.httpcomponents.version} + + + org.noggit + noggit + 0.8 + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + + + + net.schmizz + sshj + 0.10.0 + test + + + + com.fasterxml.jackson.core + jackson-core + ${dhp.jackson.version} + provided + + + + com.fasterxml.jackson.core + jackson-annotations + ${dhp.jackson.version} + provided + + + com.fasterxml.jackson.core + jackson-databind + ${dhp.jackson.version} + provided + + + + eu.dnetlib + cnr-rmi-api + ${cnr-rmi-api.version} + + + + eu.dnetlib.dhp + dnet-openaire-broker-common + ${dnet-openaire-broker-common.version} + + + + org.apache.cxf + cxf-rt-transports-http + 3.1.5 + + + + javax.persistence + javax.persistence-api + 2.2 + provided + + + + com.jayway.jsonpath + json-path + 2.4.0 + + + com.arakelian + java-jq + 0.10.1 + + + edu.cmu + secondstring + 1.0.0 + + + org.mongodb + mongo-java-driver + ${mongodb.driver.version} + + + io.fares.junit.mongodb + mongodb-junit-test + 1.1.0 + + + org.postgresql + postgresql + 42.2.10 + + + + org.antlr + stringtemplate + 3.2.1 + + + + org.antlr + ST4 + 4.3.4 + + + + com.ximpleware + vtd-xml + ${vtd.version} + + + + org.elasticsearch + elasticsearch-hadoop + 7.6.0 + + + + + org.apache.oozie + oozie-client + ${dhp.oozie.version} + provided + + + + slf4j-simple + org.slf4j + + + + + + + com.squareup.okhttp3 + okhttp + ${okhttp.version} + + + + org.apache.commons + commons-compress + ${common.compress.version} + + + org.apache.commons + commons-csv + ${common.csv.version} + + + org.apache.poi + poi-ooxml + ${apache.poi.version} + + + + org.json + json + 20180813 + + + + org.json4s + json4s-jackson_${scala.binary.version} + ${json4s.version} + + + + com.github.victools + jsonschema-generator + ${jsonschemagenerator.version} + + + + org.apache.commons + commons-text + ${common.text.version} + + + + com.opencsv + opencsv + 5.5 + + + io.github.classgraph + classgraph + 4.8.71 + + + + com.fasterxml.jackson.dataformat + jackson-dataformat-xml + ${jackson.version} + provided + + + com.fasterxml.jackson.module + jackson-module-jsonSchema + ${jackson.version} + provided + + + + org.apache.commons + commons-math3 + 3.6.1 + + + + com.google.code.gson + gson + ${google.gson.version} + + + + commons-collections + commons-collections + ${commons.collections.version} + + + commons-logging + commons-logging + ${commons.logging.version} + + + + org.reflections + reflections + ${reflections.version} + + + + org.scala-lang + scala-library + ${scala.version} + + + + com.ibm.icu + icu4j + 70.1 + + + + org.javassist + javassist + ${javassist.version} + + + + + + target + target/classes + ${project.artifactId}-${project.version} + target/test-classes + + + + org.apache.maven.plugins + maven-plugin-plugin + 3.3 + + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.0.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.9.1 + + ${dhp.site.skip} + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven.compiler.plugin.version} + + 1.8 + 1.8 + ${project.build.sourceEncoding} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.2 + + + + org.apache.maven.plugins + maven-source-plugin + 3.0.1 + + + attach-sources + verify + + jar-no-fork + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M4 + + true + false + + + + org.apache.maven.plugins + maven-javadoc-plugin + 3.2.0 + + true + none + + + + org.apache.maven.plugins + maven-dependency-plugin + 3.6.0 + + + + net.revelc.code.formatter + formatter-maven-plugin + 2.11.0 + + + eu.dnetlib.dhp + dhp-code-style + ${project.version} + + + + + org.antipathy + mvn-scalafmt_${scala.binary.version} + 1.0.1640073709.733712b + + + eu.dnetlib.dhp + dhp-code-style + ${project.version} + + + + + + + + org.apache.maven.plugins + maven-site-plugin + + + org.apache.maven.plugins + maven-project-info-reports-plugin + + + net.revelc.code.formatter + formatter-maven-plugin + + + + format + + + eclipse/formatter_dnet.xml + + + + + + net.revelc.code + impsort-maven-plugin + 1.6.2 + + java.,javax.,org.,com. + java,* + + **/thrift/*.java + + + + + sort-imports + + sort + + + + + + org.antipathy + mvn-scalafmt_${scala.binary.version} + + + https://code-repo.d4science.org/D-Net/dnet-hadoop/raw/branch/beta/dhp-build/dhp-code-style/src/main/resources/scalafmt/scalafmt.conf + + false + false + + ${project.basedir}/src/main/scala + + + ${project.basedir}/src/test/scala + + false + false + : git rev-parse --abbrev-ref HEAD + false + + + + validate + + format + + + + + + org.apache.maven.plugins + maven-release-plugin + 2.5.3 + + + org.jacoco + jacoco-maven-plugin + 0.8.10 + + + **/schemas/* + **/com/cloudera/**/* + **/org/apache/avro/io/**/* + + + + + default-prepare-agent + + prepare-agent + + + + default-report + prepare-package + + report + + + + + + + + + + org.apache.maven.wagon + wagon-ssh + 2.10 + + + + + + dnet45-snapshots + DNet45 Snapshots + https://maven.d4science.org/nexus/content/repositories/dnet45-snapshots + default + + + dnet45-releases + https://maven.d4science.org/nexus/content/repositories/dnet45-releases + + + DHPSite + ${dhp.site.stage.path}/ + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + true + none + + + + + + + sftp://dnet-hadoop@static-web.d4science.org/dnet-hadoop + UTF-8 + UTF-8 + 1.8 + 1.8 + + + 2.11.12 + 2.11 + + + 3.6.0 + 2.22.2 + 2.0.1 + 4.0.1 + + + 4.1.2 + [2.6.1] + 1.20 + 1.8 + 1.8 + 1.9.4 + 1.9 + 3.2.1 + 2.4 + 1.1.3 + 1.7 + 1.0.7 + [7.0.1] + cdh5.9.2 + 3.5 + 11.0.2 + 2.6.0-${dhp.cdh.version} + 2.9.6 + 4.1.0-${dhp.cdh.version} + true + 2.4.0.cloudera2 + [3.1.6] + 2.2.2 + 1.2.17 + 3.19.0-GA + 3.5.3 + 4.13.0 + 5.6.1 + 3.3.3 + 3.4.2 + 4.7.2 + 4.5.3 + 1.7.25 + 0.9.10 + 1.3.0 + 7.5.0 + 3.6.0 + 0.0.7 + [2.12,3.0) + 3.4.6 + + + + + + + arm-silicon-mac + + + aarch64 + mac + + + + + + org.xerial.snappy + snappy-java + 1.1.8.4 + + + + + + spark-34 + + 2.12 + 2.12.18 + 1.3.0 + + + 4.8.1 + + + 1.22 + 1.8 + 1.10.0 + 1.9.4 + 1.15 + 3.2.2 + 2.11.0 + 1.1.3 + 1.7 + + 14.0.1 + 8.11.0 + 4.0.4 + 3.4.2.openaire + 2.14.2 + 3.12.0 + 2.19.0 + 3.7.0-M11 + 3.25.0-GA + 4.10.0 + 2.0.6 + 0.10.2 + 3.6.3 + + + + + spark-35 + + 2.12 + 2.12.18 + 1.3.0 + + + 4.8.1 + + + 1.23.0 + 1.8 + 1.10.0 + 1.9.4 + 1.16.0 + 3.2.2 + 2.13.0 + 1.1.3 + 1.7 + + 14.0.1 + 8.11.0 + 4.0.4 + 3.5.1.openaire-SNAPSHOT + 2.15.2 + 3.12.0 + 2.20.0 + 3.7.0-M11 + 3.25.0-GA + 4.10.0 + 2.0.7 + 0.10.2 + 3.6.3 + + + + + java11 + + [11 + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 3.0.0-M4 + + + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + + true + false + + + + + + \ No newline at end of file