persisting full urls

master
Andrea Mannocci 3 years ago
parent 1796052086
commit f21a3f7d30

@ -127,7 +127,7 @@ def main(input_filepath, output_filepath):
df.n_employment = df.n_employment.astype(pd.Int16Dtype())
logger.info('Dropping useless columns')
df = df.drop(['urls', 'other_emails'], axis=1)
df.drop(['other_emails'], axis=1, inplace=True)
logger.info('Serializing the dataset in ./data/processed')
n = 1000000

Loading…
Cancel
Save