added michele notebook
This commit is contained in:
parent
7b948bb29e
commit
3854e03d10
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -125,6 +125,7 @@ def main(input_filepath, output_filepath, external_filepath):
|
||||||
df['primary_email_domain'] = df[df.primary_email.notna()]['primary_email'].apply(lambda x: x.split('@')[1])
|
df['primary_email_domain'] = df[df.primary_email.notna()]['primary_email'].apply(lambda x: x.split('@')[1])
|
||||||
df['other_email_domains'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: extract_email_domains(x))
|
df['other_email_domains'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: extract_email_domains(x))
|
||||||
df['url_domains'] = df[df.urls.notna()]['urls'].apply(lambda x: extract_url_domains(x))
|
df['url_domains'] = df[df.urls.notna()]['urls'].apply(lambda x: extract_url_domains(x))
|
||||||
|
df['other_url_domains'] = df[df.other_urls.notna()]['other_urls'].apply(lambda x: extract_url_domains(x))
|
||||||
|
|
||||||
logger.info('Creating simple numeric columns')
|
logger.info('Creating simple numeric columns')
|
||||||
df['n_emails'] = df.other_emails.str.len()
|
df['n_emails'] = df.other_emails.str.len()
|
||||||
|
|
Loading…
Reference in New Issue