diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py index 14dc3b1..365c0e5 100644 --- a/src/data/make_dataset.py +++ b/src/data/make_dataset.py @@ -127,7 +127,7 @@ def main(input_filepath, output_filepath): df.n_employment = df.n_employment.astype(pd.Int16Dtype()) logger.info('Dropping useless columns') - df = df.drop(['urls', 'other_emails'], axis=1) + df.drop(['other_emails'], axis=1, inplace=True) logger.info('Serializing the dataset in ./data/processed') n = 1000000