dnet-hadoop/dhp-workflows/dhp-dedup/src/main/resources/eu/dnetlib/dhp/dedup/oozie_app/config-default.xml

31 lines
23 KiB
XML
Raw Normal View History

2019-12-06 13:38:00 +01:00
<configuration>
<property>
<name>jobTracker</name>
<dedupId>yarnRM</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>nameNode</name>
<dedupId>hdfs://nameservice1</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>oozie.use.system.libpath</name>
<dedupId>true</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>oozie.action.sharelib.for.spark</name>
<dedupId>spark2</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>hive_metastore_uris</name>
<dedupId>thrift://iis-cdh5-test-m3.ocean.icm.edu.pl:9083</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>hive_db_name</name>
<dedupId>openaire</dedupId>
2019-12-06 13:38:00 +01:00
</property>
<property>
<name>dedupConf</name>
<dedupId>{"wf":{"threshold":"0.9","dedupRun":"001","entityType":"organization","orderField":"legalname","queueMaxSize":"2000","groupMaxSize":"50","slidingWindowSize":"200","idPath":".id","rootBuilder":["organization","projectOrganization_participation_isParticipant","datasourceOrganization_provision_isProvidedBy"],"includeChildren":"true"},"pace":{"clustering":[{"name":"sortedngrampairs","fields":["legalname"],"params":{"max":2,"ngramLen":"3"}},{"name":"suffixprefix","fields":["legalname"],"params":{"max":1,"len":"3"}},{"name":"urlclustering","fields":["websiteurl"],"params":{}},{"name":"keywordsclustering","fields":["legalname"],"params":{"max":2,"windowSize":4}}],"strictConditions":[{"name":"exactMatch","fields":["gridid"]}],"conditions":[{"name":"DomainExactMatch","fields":["websiteurl"]},{"name":"exactMatch","fields":["country"]}],"model":[{"name":"country","algo":"Null","type":"String","weight":"0","ignoreMissing":"false","path":".country.classid"},{"name":"legalshortname","algo":"JaroWinklerNormalizedName","type":"String","weight":"0.1","ignoreMissing":"true","path":".legalshortname.dedupId"},{"name":"legalname","algo":"JaroWinklerNormalizedName","type":"String","weight":"0.9","ignoreMissing":"false","path":".legalname.dedupId","params":{"windowSize":4,"threshold":0.7}},{"name":"websiteurl","algo":"Null","type":"URL","weight":"0","ignoreMissing":"true","path":".websiteurl.dedupId","params":{"host":0.5,"path":0.5}},{"name":"gridid","algo":"Null","type":"String","weight":"0.0","ignoreMissing":"true","path":".pid[] | select(.qualifier.classid==\"grid\") | .dedupId"}],"blacklists":{"legalname":[]},"synonyms":{"key::1":["university","università","università studi","universitario","universitaria","université","universitaire","universitaires","universidad","universitade","Universität","universitaet","Uniwersytet","университет","universiteit","πανεπιστήμιο","universitesi","universiteti"],"key::2":["studies","studi","études","estudios","estudos","Studien","studia","исследования","studies","σπουδές"],"key::3":["advanced","superiore","supérieur","supérieure","supérieurs","supérieures","avancado","avancados","fortgeschrittene","fortgeschritten","zaawansowany","передовой","gevorderd","gevorderde","προχωρημένος","προχωρημένη","προχωρημένο","προχωρημένες","προχωρημένα","wyzsza"],"key::4":["institute","istituto","institut","instituto","instituto","Institut","instytut","институт","instituut","ινστιτούτο"],"key::5":["hospital","ospedale","hôpital","hospital","hospital","Krankenhaus","szpital","больница","ziekenhuis","νοσοκομείο"],"key::6":["research","ricerca","recherche","investigacion","pesquisa","Forschung","badania","исследования","onderzoek","έρευνα","erevna","erevnas"],"key::7":["college","collegio","université","colegio","faculdade","Hochschule","Szkoła Wyższa","Высшая школа","universiteit","κολλέγιο"],"key::8":["foundation","fondazione","fondation","fundación","fundação","Stiftung","Fundacja","фонд","stichting","ίδρυμα","idryma"],"key::9":["center","centro","centre","centro","centro","zentrum","centrum","центр","centrum","κέντρο"],"key::10":["national","nazionale","national","nationale","nationaux","nationales","nacional","nacional","national","krajowy","национальный","nationaal","nationale","εθνικό"],"key::11":["association","associazione","association","asociación","associação","Verein","verband","stowarzyszenie","ассоциация","associatie"],"key::12":["society","societa","société","sociedad","sociedade","gesellschaft","społeczeństwo","общество","maatschappij","κοινωνία"],"key::13":["international","internazionale","international","internacional","internacional","international","międzynarodowy","Международный","internationaal","internationale","διεθνής","διεθνή","διεθνές"],"key::14":["community","comunita","communauté
2019-12-06 13:38:00 +01:00
</property>
</configuration>