From f21a3f7d303a90b63ce08ff998c2a43a67a4f82d Mon Sep 17 00:00:00 2001 From: Andrea Mannocci Date: Thu, 22 Apr 2021 17:30:50 +0200 Subject: [PATCH] persisting full urls --- src/data/make_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py index 14dc3b1..365c0e5 100644 --- a/src/data/make_dataset.py +++ b/src/data/make_dataset.py @@ -127,7 +127,7 @@ def main(input_filepath, output_filepath): df.n_employment = df.n_employment.astype(pd.Int16Dtype()) logger.info('Dropping useless columns') - df = df.drop(['urls', 'other_emails'], axis=1) + df.drop(['other_emails'], axis=1, inplace=True) logger.info('Serializing the dataset in ./data/processed') n = 1000000