Make decorater(send slack message)


In [32]:
import requests
import time

def send_slack(channel, username, icon_emoji, message):
    
    #change base_url your slack url
    base_url = "https://hooks.slack.com/services/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"
    payload = {
        "channel": channel,
        "username": username,
        "icon_emoji": icon_emoji,
        "text": message
        
    }
    response = requests.post(base_url, data=json.dumps(payload))
    print(response.content)
    
def slack(function):
    def wrapper(*args, **kwargs):
        name = function.__name__
        start_time = time.time()
        current_time = str(datetime.datetime.now())
        send_slack("dss", "databot", ":ghost:", 
                   "Start {name} - {time}".format(name=name, time=current_time)
                  )
        result = function(*args, **kwargs)
        current_time = str(datetime.datetime.now())
        end_time = time.time()
        send_slack("dss", "databot", ":ghost:", 
                   "End {name} - total time {time}s ".format(name=name, time=int(end_time-start_time))
                  )

    return wrapper

Make class for image processing


In [33]:
import PIL
from PIL import Image

import pandas as pd
import numpy as np

import os
import pickle
import shutil


class Image_processing():
    
    
    def __init__(self):
        self.path = os.curdir
        self.img_size = [640, 480]
        self.img_cut = [10, 110, 10, 80]
        self.img_flatten = (self.img_cut[3]-self.img_cut[2]) * (self.img_cut[1]-self.img_cut[0])
        
    def __image_processing(self, path):
        """
        grayscale & resize & cut image
        """
        img = Image.open(path)
        resize_img = img.resize(self.img_size, PIL.Image.ANTIALIAS)
        gray_img = resize_img.convert("L", dither=1)
        if self.img_cut:
            cut = self.img_cut
            cut_img = np.array(gray_img)[cut[2]:cut[3],cut[0]:cut[1]]
            return cut_img.ravel()
        return np.array(gray_img.ravel())
    
    def __make_train_dataframe(self, class_number):
        """
        make train_image dataframe
        """
        path = os.path.join(self.path, "imgs", "train")
        data = list(map(self.__image_processing,
                        [os.path.join(path, "c"+str(class_number), i) 
                         for i in os.listdir(os.path.join(path, "c"+str(class_number)))]
                       )
                   )
        df = pd.DataFrame(data, columns=["X"+str(i) 
                                         for i in range(self.img_flatten)])
        df["Y"] = class_number
        return df
    
    def __make_test_dataframe(self, number):
        """
        make test_image dataframe
        """
        path = os.path.join(self.path, "imgs", "test")
        data = list(map(self.__image_processing,
                        [os.path.join(path, i) 
                         for i in os.listdir(path)[5000*number:5000*(number+1)]
                        ]
                       )
                   )
        df = pd.DataFrame(data, columns=["X"+str(i) 
                                         for i in range(self.img_flatten)])
        return df
    
    def __check(self, path, size, cut):
        """
        check folder list & image size
        """
        if "imgs" not in os.listdir(self.path):
            try:
                print("Unpacking...")
                shutil.unpack_archive("imgs.zip", "imgs", "zip")
                print("Unpack Complete!")
            except:
                print("You can download 'imgs.zip' in \n\
                    https://www.kaggle.com/c/state-farm-distracted-driver-detection/data")
                raise FileNotFoundError
                
        if "data" not in os.listdir(self.path):
            "Make 'data' directory"
            os.mkdir("data")

        if path:
            self.path = path
        if size:
            self.size = size
        if cut:
            self.img_cut = cut
            
        size = self.img_flatten

        return size
    
    @slack
    def make_train_data(self, save=True, path="", size=[], cut=[]):
        """
        All data save as path/data/, default path is current directory
        If you want to change save directory, change self.path value
        save: train_dataframe to pickle, default True
        path: defalut path is current directory
        size: resize image, default = [120,90]
        cut: cut img, default = [10,110,10,80]
        """
        
        size = self.__check(path, size, cut)
        
        answer = input("If pickle file in folder, do you want to use this file? (Y/n)")
        train_df = pd.DataFrame(columns = ["X"+str(i) for i in range(size)] + ["Y"])
        print("Start working...")
        for i in range(10):
            file_name = "train_class_" + str(i) + ".pickle"
            if (file_name in os.listdir(os.path.join(self.path, "data"))) & (answer.upper() == "Y"):
                train_class_df = pd.read_pickle(os.path.join(self.path, "data", file_name))
            else:
                train_class_df = self.__make_train_dataframe(i)
                train_class_df.to_pickle(os.path.join(self.path, "data", file_name))
            train_df = pd.concat([train_df, train_class_df])
            print("{0} / {1} Complete".format(i+1, 10))
        
        train_df.reset_index(drop=True, inplace=True)
        if save:
            train_df.to_pickle(os.path.join(self.path, "data", "train_df.pickle"))
        print("All Complete!")
        return train_df
    
    def load_train_data(self):
        """
        load train dataframe
        """
        return pd.read_pickle(os.path.join(self.path, "data", "train_df.pickle"))
    
    @slack
    def make_test_data(self, save=True, path="", size=[], cut=[]):
        """
        All data save as path/data/, default path is current directory
        If you want to change save directory, change self.path value
        
        save: test_dataframe to pickle, default True
        path: defalut path is current directory
        size: resize image, default = [120,90]
        cut: cut img, default = [10,110,10,80]
        
        """
        
        size = self.__check(path, size, cut)
        
        answer = input("If pickle file in folder, do you want to use this file? (Y/n)")
        test_df = pd.DataFrame(columns = ["X"+str(i) for i in range(size)] + ["Y"])
        for i in range(16):
            file_name = "test_" + str(i) + ".pickle"
            if (file_name in os.listdir(os.path.join(self.path, "data"))) & (answer.upper() == "Y"):
                test_small_df = pd.read_pickle(os.path.join(self.path, "data", file_name))
            else:
                test_small_df = self.__make_test_dataframe(i)
                test_small_df.to_pickle(os.path.join(self.path, "data", file_name))
            test_df = pd.concat([test_df, test_small_df])
            print("{0} / {1} Complete".format(i+1, 16))
        
        test_df.reset_index(drop=True, inplace=True)
        if save:
            test_df.to_pickle(os.path.join(self.path, "data", "test_df.pickle"))
        print("All Complete!")
        
        return test_df
            

    def load_test_data(self):
        """
        load test dataframe
        """
        return pd.read_pickle(os.path.join(self.path, "data", "test_df.pickle"))

In [30]:
process = Image_processing()

In [26]:
process.make_train_data()


If pickle file in folder, do you want to use this file? (Y/n)Y
Start working...
1 / 10 Complete
2 / 10 Complete
3 / 10 Complete
4 / 10 Complete
5 / 10 Complete
6 / 10 Complete
7 / 10 Complete
8 / 10 Complete
9 / 10 Complete
10 / 10 Complete
All Complete!
Out[26]:
X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 ... X6991 X6992 X6993 X6994 X6995 X6996 X6997 X6998 X6999 Y
0 26.0 4.0 17.0 2.0 2.0 1.0 3.0 3.0 5.0 6.0 ... 57.0 58.0 59.0 56.0 55.0 54.0 54.0 54.0 56.0 0.0
1 43.0 44.0 44.0 44.0 44.0 43.0 40.0 41.0 41.0 42.0 ... 76.0 76.0 75.0 77.0 76.0 76.0 75.0 75.0 76.0 0.0
2 49.0 49.0 49.0 49.0 49.0 49.0 48.0 48.0 48.0 48.0 ... 84.0 84.0 84.0 84.0 84.0 84.0 84.0 84.0 84.0 0.0
3 25.0 25.0 25.0 26.0 27.0 28.0 28.0 28.0 28.0 28.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 0.0
4 17.0 14.0 12.0 12.0 15.0 18.0 4.0 7.0 10.0 14.0 ... 28.0 33.0 37.0 31.0 33.0 35.0 36.0 35.0 35.0 0.0
5 36.0 36.0 36.0 36.0 36.0 36.0 37.0 37.0 37.0 37.0 ... 61.0 62.0 63.0 64.0 64.0 65.0 66.0 65.0 64.0 0.0
6 62.0 62.0 63.0 63.0 63.0 64.0 64.0 64.0 64.0 65.0 ... 126.0 130.0 135.0 128.0 129.0 130.0 130.0 136.0 139.0 0.0
7 18.0 18.0 18.0 18.0 18.0 18.0 17.0 18.0 18.0 19.0 ... 37.0 34.0 30.0 29.0 32.0 35.0 36.0 36.0 35.0 0.0
8 199.0 176.0 145.0 112.0 84.0 64.0 50.0 32.0 14.0 8.0 ... 74.0 74.0 73.0 68.0 64.0 57.0 51.0 48.0 46.0 0.0
9 45.0 45.0 45.0 45.0 44.0 44.0 46.0 45.0 45.0 44.0 ... 79.0 80.0 81.0 76.0 76.0 76.0 76.0 76.0 76.0 0.0
10 38.0 40.0 40.0 39.0 37.0 36.0 38.0 38.0 38.0 38.0 ... 66.0 66.0 66.0 68.0 68.0 68.0 67.0 67.0 66.0 0.0
11 191.0 243.0 248.0 241.0 243.0 249.0 249.0 213.0 145.0 76.0 ... 40.0 40.0 40.0 40.0 42.0 45.0 46.0 46.0 46.0 0.0
12 45.0 45.0 45.0 45.0 45.0 45.0 46.0 46.0 46.0 46.0 ... 63.0 65.0 66.0 65.0 65.0 66.0 66.0 66.0 67.0 0.0
13 27.0 28.0 28.0 28.0 29.0 29.0 29.0 29.0 28.0 28.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 6.0 6.0 7.0 0.0
14 239.0 241.0 244.0 246.0 246.0 246.0 246.0 246.0 245.0 243.0 ... 16.0 14.0 12.0 27.0 31.0 33.0 31.0 29.0 34.0 0.0
15 43.0 43.0 41.0 42.0 44.0 46.0 45.0 45.0 45.0 45.0 ... 106.0 106.0 106.0 106.0 110.0 111.0 109.0 110.0 112.0 0.0
16 21.0 22.0 23.0 23.0 23.0 22.0 21.0 22.0 22.0 21.0 ... 50.0 47.0 44.0 48.0 48.0 48.0 47.0 45.0 42.0 0.0
17 16.0 16.0 16.0 16.0 15.0 15.0 16.0 17.0 17.0 17.0 ... 60.0 61.0 61.0 64.0 57.0 57.0 64.0 66.0 60.0 0.0
18 33.0 34.0 34.0 35.0 35.0 35.0 36.0 36.0 36.0 36.0 ... 89.0 90.0 81.0 34.0 22.0 13.0 14.0 22.0 27.0 0.0
19 27.0 27.0 27.0 27.0 28.0 29.0 29.0 28.0 28.0 28.0 ... 73.0 72.0 72.0 20.0 2.0 7.0 5.0 2.0 6.0 0.0
20 91.0 104.0 119.0 141.0 168.0 187.0 231.0 235.0 239.0 242.0 ... 15.0 27.0 45.0 35.0 33.0 32.0 33.0 35.0 37.0 0.0
21 110.0 206.0 253.0 249.0 252.0 234.0 240.0 183.0 102.0 36.0 ... 46.0 47.0 47.0 52.0 50.0 48.0 52.0 56.0 56.0 0.0
22 65.0 65.0 66.0 66.0 66.0 67.0 66.0 66.0 66.0 67.0 ... 144.0 144.0 145.0 148.0 146.0 142.0 142.0 148.0 150.0 0.0
23 33.0 33.0 33.0 33.0 32.0 32.0 32.0 32.0 32.0 32.0 ... 79.0 80.0 83.0 86.0 85.0 82.0 81.0 83.0 81.0 0.0
24 41.0 41.0 41.0 41.0 41.0 41.0 43.0 43.0 43.0 43.0 ... 67.0 67.0 68.0 68.0 68.0 68.0 68.0 68.0 68.0 0.0
25 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 28.0 ... 4.0 4.0 5.0 4.0 4.0 6.0 7.0 8.0 8.0 0.0
26 17.0 18.0 18.0 17.0 17.0 16.0 17.0 17.0 17.0 17.0 ... 61.0 62.0 62.0 71.0 66.0 57.0 44.0 24.0 5.0 0.0
27 42.0 42.0 42.0 42.0 42.0 42.0 42.0 42.0 42.0 42.0 ... 99.0 99.0 98.0 100.0 98.0 98.0 99.0 92.0 74.0 0.0
28 7.0 7.0 14.0 4.0 4.0 5.0 9.0 9.0 7.0 6.0 ... 49.0 50.0 51.0 49.0 47.0 46.0 46.0 46.0 46.0 0.0
29 29.0 30.0 31.0 31.0 30.0 30.0 30.0 31.0 31.0 32.0 ... 24.0 18.0 19.0 21.0 22.0 21.0 18.0 13.0 9.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
22394 14.0 14.0 15.0 15.0 15.0 15.0 17.0 17.0 17.0 17.0 ... 61.0 61.0 60.0 63.0 62.0 62.0 63.0 65.0 64.0 9.0
22395 50.0 50.0 50.0 50.0 50.0 50.0 49.0 49.0 49.0 50.0 ... 82.0 80.0 78.0 83.0 82.0 81.0 80.0 81.0 82.0 9.0
22396 22.0 22.0 23.0 23.0 23.0 24.0 24.0 24.0 24.0 25.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 9.0
22397 42.0 42.0 43.0 43.0 43.0 44.0 43.0 43.0 43.0 44.0 ... 76.0 88.0 87.0 86.0 77.0 54.0 30.0 16.0 13.0 9.0
22398 28.0 28.0 28.0 28.0 27.0 27.0 30.0 30.0 30.0 30.0 ... 41.0 40.0 40.0 39.0 39.0 40.0 40.0 40.0 40.0 9.0
22399 77.0 74.0 84.0 109.0 140.0 162.0 189.0 203.0 222.0 237.0 ... 55.0 52.0 47.0 47.0 51.0 66.0 118.0 163.0 123.0 9.0
22400 13.0 14.0 16.0 18.0 18.0 19.0 23.0 17.0 8.0 3.0 ... 29.0 33.0 36.0 38.0 39.0 39.0 39.0 41.0 44.0 9.0
22401 35.0 35.0 35.0 35.0 35.0 35.0 37.0 37.0 37.0 37.0 ... 39.0 28.0 21.0 27.0 25.0 23.0 22.0 19.0 16.0 9.0
22402 22.0 23.0 23.0 22.0 22.0 21.0 19.0 20.0 21.0 22.0 ... 46.0 46.0 45.0 47.0 46.0 45.0 46.0 48.0 48.0 9.0
22403 35.0 36.0 37.0 37.0 36.0 36.0 35.0 35.0 35.0 36.0 ... 10.0 9.0 10.0 8.0 7.0 13.0 28.0 42.0 49.0 9.0
22404 59.0 59.0 59.0 59.0 59.0 59.0 60.0 60.0 60.0 60.0 ... 117.0 118.0 118.0 120.0 120.0 119.0 119.0 122.0 122.0 9.0
22405 240.0 251.0 220.0 249.0 237.0 237.0 151.0 76.0 12.0 8.0 ... 52.0 52.0 52.0 54.0 53.0 52.0 51.0 49.0 48.0 9.0
22406 23.0 23.0 21.0 19.0 20.0 20.0 24.0 24.0 23.0 23.0 ... 64.0 64.0 64.0 63.0 64.0 65.0 69.0 74.0 74.0 9.0
22407 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 53.0 ... 80.0 80.0 80.0 79.0 79.0 78.0 77.0 78.0 79.0 9.0
22408 13.0 15.0 17.0 18.0 19.0 19.0 25.0 18.0 11.0 7.0 ... 31.0 34.0 36.0 37.0 38.0 40.0 41.0 42.0 43.0 9.0
22409 232.0 247.0 217.0 180.0 144.0 44.0 14.0 9.0 10.0 14.0 ... 58.0 58.0 58.0 58.0 58.0 58.0 57.0 57.0 56.0 9.0
22410 39.0 39.0 40.0 40.0 40.0 41.0 43.0 43.0 43.0 43.0 ... 88.0 88.0 88.0 91.0 90.0 85.0 82.0 86.0 91.0 9.0
22411 29.0 30.0 29.0 29.0 29.0 29.0 27.0 28.0 29.0 31.0 ... 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 5.0 9.0
22412 19.0 17.0 108.0 216.0 251.0 249.0 244.0 246.0 226.0 166.0 ... 66.0 66.0 66.0 66.0 67.0 67.0 68.0 68.0 67.0 9.0
22413 37.0 37.0 38.0 39.0 39.0 39.0 37.0 37.0 37.0 38.0 ... 81.0 81.0 82.0 84.0 78.0 77.0 85.0 87.0 78.0 9.0
22414 48.0 48.0 48.0 48.0 48.0 48.0 47.0 47.0 46.0 46.0 ... 115.0 115.0 116.0 119.0 113.0 112.0 116.0 118.0 112.0 9.0
22415 18.0 18.0 17.0 16.0 15.0 14.0 16.0 16.0 16.0 16.0 ... 31.0 34.0 38.0 34.0 35.0 36.0 38.0 38.0 38.0 9.0
22416 46.0 24.0 16.0 14.0 6.0 9.0 2.0 4.0 13.0 19.0 ... 137.0 52.0 32.0 54.0 60.0 62.0 59.0 57.0 59.0 9.0
22417 59.0 58.0 58.0 59.0 60.0 60.0 59.0 59.0 59.0 59.0 ... 130.0 131.0 132.0 128.0 135.0 134.0 128.0 132.0 139.0 9.0
22418 39.0 40.0 40.0 41.0 41.0 41.0 40.0 40.0 40.0 41.0 ... 69.0 69.0 69.0 67.0 67.0 67.0 67.0 67.0 67.0 9.0
22419 253.0 251.0 230.0 246.0 243.0 215.0 145.0 68.0 11.0 7.0 ... 85.0 84.0 82.0 84.0 79.0 83.0 88.0 74.0 52.0 9.0
22420 61.0 61.0 61.0 61.0 61.0 61.0 60.0 61.0 61.0 61.0 ... 112.0 112.0 114.0 111.0 119.0 120.0 110.0 113.0 120.0 9.0
22421 47.0 47.0 47.0 47.0 47.0 47.0 48.0 48.0 48.0 48.0 ... 106.0 106.0 105.0 108.0 109.0 108.0 107.0 107.0 111.0 9.0
22422 32.0 32.0 32.0 33.0 33.0 33.0 33.0 35.0 37.0 37.0 ... 56.0 57.0 58.0 63.0 71.0 79.0 79.0 73.0 69.0 9.0
22423 23.0 67.0 171.0 248.0 252.0 243.0 230.0 238.0 208.0 126.0 ... 50.0 50.0 50.0 52.0 53.0 53.0 53.0 53.0 53.0 9.0

22424 rows × 7001 columns


In [31]:
process.make_test_data()


If pickle file in folder, do you want to use this file? (Y/n)Y
1 / 8 Complete
2 / 8 Complete
3 / 8 Complete
4 / 8 Complete
5 / 8 Complete
6 / 8 Complete
7 / 8 Complete
8 / 8 Complete
---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-31-5ab9cce0cf0c> in <module>()
----> 1 process.make_test_data()

<ipython-input-24-d8954facdf01> in make_test_data(self, save, path, size, cut)
    155         test_df.reset_index(drop=True, inplace=True)
    156         if save:
--> 157             test_df.to_pickle(os.path.join(self.path, "data", "test_df.pickle"))
    158         print("All Complete!")
    159 

C:\Users\kms\Anaconda3\lib\site-packages\pandas\core\generic.py in to_pickle(self, path)
   1175         """
   1176         from pandas.io.pickle import to_pickle
-> 1177         return to_pickle(self, path)
   1178 
   1179     def to_clipboard(self, excel=None, sep=None, **kwargs):

C:\Users\kms\Anaconda3\lib\site-packages\pandas\io\pickle.py in to_pickle(obj, path)
     18     """
     19     with open(path, 'wb') as f:
---> 20         pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)
     21 
     22 

MemoryError: 

In [ ]: