updates abbr
This commit is contained in:
parent
fbf55b3d5d
commit
b38be012a0
|
@ -186,10 +186,21 @@ protected_phrases1 = [
|
|||
|
||||
|
||||
|
||||
replacements = {'uni versity':'university',
|
||||
replacements = {'nat.':'national',
|
||||
'uni versity':'university',
|
||||
'inst ':'institute ',
|
||||
'adv ':'advanced ',
|
||||
'univ ':'university ',
|
||||
'stud ': 'studies ',
|
||||
'inst.':'institute',
|
||||
'adv.':'advanced',
|
||||
'univ.':'university',
|
||||
'stud.': 'studies',
|
||||
'uni versity':'university',
|
||||
'univ ':'university ',
|
||||
'univercity':'university',
|
||||
'universtiy':'university',
|
||||
'univerisity':'university',
|
||||
'univeristy':'university',
|
||||
'universirty':'university',
|
||||
'universiti':'university',
|
||||
|
@ -234,7 +245,7 @@ def substrings_dict(string):
|
|||
dict_string = {}
|
||||
index = 0
|
||||
for value in split_strings:
|
||||
|
||||
vaule = value.replace('.', ' ')
|
||||
# Check if the substring contains any university-related terms
|
||||
if not any(term in value.lower() for term in university_terms):
|
||||
# Apply regex substitutions for common patterns
|
||||
|
@ -286,7 +297,7 @@ def clean_string(input_string):
|
|||
|
||||
|
||||
# Remove characters that are not from the Latin alphabet, or allowed punctuation
|
||||
result = replace_comma_spaces(re.sub(r'[^a-zA-Z\s,;/]', '', result).strip())
|
||||
result = replace_comma_spaces(re.sub(r'[^a-zA-Z\s,;/.]', '', result).strip())
|
||||
|
||||
# Restore the " - " sequence from the placeholder
|
||||
result = result.replace(placeholder, " – ")
|
||||
|
@ -308,7 +319,7 @@ def clean_string_facts(input_string):
|
|||
result = re.sub(r'\bsaint\b', 'st', result)
|
||||
|
||||
# Remove characters that are not from the Latin alphabet or numbers
|
||||
result = re.sub(r'[^a-zA-Z0-9\s;/-]', '', result)
|
||||
result = re.sub(r'[^a-zA-Z0-9\s;/-.]', '', result)
|
||||
|
||||
# Replace consecutive whitespace with a single space
|
||||
result = re.sub(r'\s+', ' ', result)
|
||||
|
|
Loading…
Reference in New Issue