blob: 8e9d49dd8670c15e44f1042db60eeda38ef58908 [file] [log] [blame]
#!/usr/bin/env python
# coding=utf-8
"""AcquisitorAndCleaner engine action.
Use this module to add the project main code.
"""
from .._compatibility import six
from .._logging import get_logger
from marvin_python_toolbox.engine_base import EngineBaseDataHandler
from marvin_python_toolbox.common.data import MarvinData
import pandas as pd
__all__ = ['AcquisitorAndCleaner']
logger = get_logger('acquisitor_and_cleaner')
class AcquisitorAndCleaner(EngineBaseDataHandler):
def __init__(self, **kwargs):
super(AcquisitorAndCleaner, self).__init__(**kwargs)
def execute(self, params, **kwargs):
data_file = MarvinData.download_file("https://s3.amazonaws.com/marvin-engines-data/spam.csv")
data = pd.read_csv(data_file, encoding='latin-1')
data = data.drop(["Unnamed: 2", "Unnamed: 3", "Unnamed: 4"], axis=1)
data = data.rename(columns={"v1": "label", "v2": "text"})
data['label_num'] = data.label.map({'ham': 0, 'spam': 1})
self.marvin_initial_dataset = data