persisting full urls
This commit is contained in:
parent
1796052086
commit
f21a3f7d30
|
@ -127,7 +127,7 @@ def main(input_filepath, output_filepath):
|
|||
df.n_employment = df.n_employment.astype(pd.Int16Dtype())
|
||||
|
||||
logger.info('Dropping useless columns')
|
||||
df = df.drop(['urls', 'other_emails'], axis=1)
|
||||
df.drop(['other_emails'], axis=1, inplace=True)
|
||||
|
||||
logger.info('Serializing the dataset in ./data/processed')
|
||||
n = 1000000
|
||||
|
|
Loading…
Reference in New Issue