persisting full urls

This commit is contained in:
Andrea Mannocci 2021-04-22 17:30:50 +02:00
parent 1796052086
commit f21a3f7d30
1 changed files with 1 additions and 1 deletions

View File

@ -127,7 +127,7 @@ def main(input_filepath, output_filepath):
df.n_employment = df.n_employment.astype(pd.Int16Dtype()) df.n_employment = df.n_employment.astype(pd.Int16Dtype())
logger.info('Dropping useless columns') logger.info('Dropping useless columns')
df = df.drop(['urls', 'other_emails'], axis=1) df.drop(['other_emails'], axis=1, inplace=True)
logger.info('Serializing the dataset in ./data/processed') logger.info('Serializing the dataset in ./data/processed')
n = 1000000 n = 1000000