added michele notebook

master
Andrea Mannocci 3 years ago
parent 7b948bb29e
commit 3854e03d10

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -125,6 +125,7 @@ def main(input_filepath, output_filepath, external_filepath):
df['primary_email_domain'] = df[df.primary_email.notna()]['primary_email'].apply(lambda x: x.split('@')[1])
df['other_email_domains'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: extract_email_domains(x))
df['url_domains'] = df[df.urls.notna()]['urls'].apply(lambda x: extract_url_domains(x))
df['other_url_domains'] = df[df.other_urls.notna()]['other_urls'].apply(lambda x: extract_url_domains(x))
logger.info('Creating simple numeric columns')
df['n_emails'] = df.other_emails.str.len()

Loading…
Cancel
Save