persisting full urls
This commit is contained in:
parent
1796052086
commit
f21a3f7d30
|
@ -127,7 +127,7 @@ def main(input_filepath, output_filepath):
|
||||||
df.n_employment = df.n_employment.astype(pd.Int16Dtype())
|
df.n_employment = df.n_employment.astype(pd.Int16Dtype())
|
||||||
|
|
||||||
logger.info('Dropping useless columns')
|
logger.info('Dropping useless columns')
|
||||||
df = df.drop(['urls', 'other_emails'], axis=1)
|
df.drop(['other_emails'], axis=1, inplace=True)
|
||||||
|
|
||||||
logger.info('Serializing the dataset in ./data/processed')
|
logger.info('Serializing the dataset in ./data/processed')
|
||||||
n = 1000000
|
n = 1000000
|
||||||
|
|
Loading…
Reference in New Issue