[Explore Service]: resultLanding.component.ts: added "checkIfAllowed()" method to be called by sitemap script and by updated "addNoIndexFilter()" method | Added spam words in "title_authors_words" | Added "publicCommunities" array with public communities in production - no index results from these communities.

This commit is contained in:
Konstantina Galouni 2021-08-06 14:10:42 +03:00
parent fa5da35853
commit b4b3e143bb
1 changed files with 46 additions and 36 deletions

View File

@ -640,67 +640,77 @@ export class ResultLandingComponent {
this.citeModal.open();
}
private addNoIndexFilter() {
let allow = this.checkIfAllowed(this.resultLandingInfo);
if(!allow) {
// console.log("no index");
this._meta.updateTag({content: 'noindex'}, "name='robots'");
}
}
checkIfAllowed(resultLandingInfo: ResultLandingInfo) {
let publicCommunities: string[] = [ "aginfra", "rural-digital-europe", "covid-19", "dariah", "dh-ch",
"enermaps", "mes", "fam", "ni", "sdsn-gr", "elixir-gr", "beopen"];
try {
if(!(this.properties.environment == "production" || this.properties.environment == "development") ) {
return ;
return true;
}else {
/*
//comment out poor content filters
let allow = !!(!this.resultLandingInfo.underCurationMessage &&
((this.resultLandingInfo.fundedByProjects && this.resultLandingInfo.fundedByProjects.length > 0)
|| this.resultLandingInfo.journal
|| (this.resultLandingInfo.classifiedSubjects && this.resultLandingInfo.classifiedSubjects.size > 0)
//allow free text keywords
|| (this.resultLandingInfo.otherSubjects && this.resultLandingInfo.otherSubjects.size > 0)
|| (this.resultLandingInfo.subjects && this.resultLandingInfo.subjects.length > 0)
/*
//comment out poor content filters
let allow = !!(!resultLandingInfo.underCurationMessage &&
((resultLandingInfo.fundedByProjects && resultLandingInfo.fundedByProjects.length > 0)
|| resultLandingInfo.journal
|| (resultLandingInfo.classifiedSubjects && resultLandingInfo.classifiedSubjects.size > 0)
//allow free text keywords
|| (resultLandingInfo.otherSubjects && resultLandingInfo.otherSubjects.size > 0)
|| (resultLandingInfo.subjects && resultLandingInfo.subjects.length > 0)
|| (this.resultLandingInfo.organizations && this.resultLandingInfo.organizations.length > 0)
|| this.resultLandingInfo.bioentities || (this.resultLandingInfo.references && this.resultLandingInfo.references.length > 0)
|| (this.resultLandingInfo.relatedResearchResults && this.resultLandingInfo.relatedResearchResults.length > 0)
|| (this.resultLandingInfo.similarResearchResults && this.resultLandingInfo.similarResearchResults.length > 0)
|| (this.resultLandingInfo.supplementaryResearchResults && this.resultLandingInfo.supplementaryResearchResults.length > 0)
|| (this.resultLandingInfo.supplementedByResearchResults && this.resultLandingInfo.supplementedByResearchResults.length > 0)
)
);*/
let allow = true;
// console.log("rich content " + allow)
//spam words to exclude
let title_authors_words = ["movie","hd","film","kimetsu", "1080p","4k","call of duty", "mobile hack", "TUBYDI", "电影","電影","download ebook","download [ebook]"];
|| (resultLandingInfo.organizations && resultLandingInfo.organizations.length > 0)
|| resultLandingInfo.bioentities || (resultLandingInfo.references && resultLandingInfo.references.length > 0)
|| (resultLandingInfo.relatedResearchResults && resultLandingInfo.relatedResearchResults.length > 0)
|| (resultLandingInfo.similarResearchResults && resultLandingInfo.similarResearchResults.length > 0)
|| (resultLandingInfo.supplementaryResearchResults && resultLandingInfo.supplementaryResearchResults.length > 0)
|| (resultLandingInfo.supplementedByResearchResults && resultLandingInfo.supplementedByResearchResults.length > 0)
)
);*/
let allow = true;
// console.log("rich content " + allow)
//spam words to exclude
let title_authors_words = ["movie","hd","film","kimetsu", "1080p","4k","call of duty", "mobile hack", "TUBYDI", "电影","電影","download ebook","download [ebook]","Düşük Hapı"];
let abstract_words = ["operacao-feliz-natal.blogspot.com", "moviedouban.site", "hack-expert-solution.link"];
allow = allow && !(
(this.hasKeyword(this.resultLandingInfo.title,title_authors_words) || (this.resultLandingInfo.authors && this.hasKeyword(this.resultLandingInfo.authors.map(o => o.fullName).join(" "),title_authors_words))
|| (this.resultLandingInfo.description && this.hasKeyword(this.resultLandingInfo.description,abstract_words))
(this.hasKeyword(resultLandingInfo.title,title_authors_words) || (resultLandingInfo.authors && this.hasKeyword(resultLandingInfo.authors.map(o => o.fullName).join(" "),title_authors_words))
|| (resultLandingInfo.description && this.hasKeyword(resultLandingInfo.description,abstract_words))
) &&
((this.resultLandingInfo.publisher && this.resultLandingInfo.publisher.toLowerCase() == "zenodo") ||
this.resultLandingInfo.hostedBy_collectedFrom.filter( value => {return value.downloadName && value.downloadName.toLowerCase().indexOf("zenodo")!=-1}).length > 0));
((resultLandingInfo.publisher && resultLandingInfo.publisher.toLowerCase() == "zenodo") ||
resultLandingInfo.hostedBy_collectedFrom.filter( value => {return value.downloadName && value.downloadName.toLowerCase().indexOf("zenodo")!=-1}).length > 0));
// console.log("spam content " + allow)
//common titles/ description / authors
let common_titles = ["introduction", "editorial", "book reviews", "preface", "reviews", "none", "book review", "foreword", "conclusion", "review", "reply","einleitung","short notices","erratum","discussion", "letters to the editor","letter to the editor","reviews of books",":{unav)","editorial board"];
let common_abstract = ["international audience","n/a","peer reviewed","national audience","info:eu-repo/semantics/published","-",".","graphical abstract","met lit. opg","international audience; no abstract",'<jats:p>.</jats:p>',"politics","info:eu-repo/semantics/publishedversion","copia digital. madrid : ministerio de educación, cultura y deporte, 2016",'<jats:p />',"peer-reviewed","copia digital. madrid : ministerio de educación, cultura y deporte. subdirección general de coordinación bibliotecaria, 2015","<jats:p>-</jats:p>","imperial users only","yüksek lisans"];
let common_authors = ["[s.n.]","null &na;","nn","(:unap)","(:null)","null anonymous","anonymous"];
allow = allow && !(
this.isKeyword(this.resultLandingInfo.title,common_titles) || this.isKeyword(this.resultLandingInfo.description,common_abstract) ||
(this.resultLandingInfo.authors && this.hasKeyword("_"+this.resultLandingInfo.authors.map(o => o.fullName).join("_")+"_",common_authors, "_"))
this.isKeyword(resultLandingInfo.title,common_titles) || this.isKeyword(resultLandingInfo.description,common_abstract) ||
(resultLandingInfo.authors && this.hasKeyword("_"+resultLandingInfo.authors.map(o => o.fullName).join("_")+"_",common_authors, "_"))
);
// console.log("common content " + allow)
//community filter
allow = allow && ((properties.adminToolsPortalType == 'explore' && (!this.resultLandingInfo.contexts || this.resultLandingInfo.contexts.length == 0)) ||
allow = allow && ((properties.adminToolsPortalType == 'explore' &&
(!resultLandingInfo.contexts || resultLandingInfo.contexts.length == 0 ||
resultLandingInfo.contexts.filter( context => { return publicCommunities.includes(context.idContext) }).length == 0)) ||
(properties.adminToolsPortalType != 'explore' && properties.adminToolsPortalType != 'community') ||
(properties.adminToolsPortalType == 'community' && (!!this.resultLandingInfo.contexts &&
this.resultLandingInfo.contexts.filter( context => { return ConnectHelper.getCommunityFromDomain(properties.domain) == context.idContext }).length > 0)
(properties.adminToolsPortalType == 'community' && (!!resultLandingInfo.contexts &&
resultLandingInfo.contexts.filter( context => { return ConnectHelper.getCommunityFromDomain(properties.domain) == context.idContext }).length > 0)
)
);
// console.log("community " + allow )
if(!allow) {
this._meta.updateTag({content: 'noindex'}, "name='robots'");
}
return allow;
}
} catch (e) {
console.error("Error in passNoIndexFilter()", this.resultLandingInfo.relcanId, e);
console.error("Error in passNoIndexFilter()", resultLandingInfo.relcanId, e);
return false;
}
}
private hasKeyword(value:string, words:string[], wordSuffix = ""){
return value?words.filter( word => { return value.toLowerCase().indexOf(wordSuffix + word + wordSuffix)!=-1}).length > 0:0;
}