In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import tools.CONSTANTS as c
import tools.IMAGEUTILS as im
import tools.UTILS as u
import pickle
from time import sleep

In [2]:
class Deinterlacer():
    
    def __init__(self, deint_pkl_file='done.pkl', base_dir=c.BASE_DIR):
        """
        all images must be deinterlaced before being used in the neural
        network. This slightly improves accuracy and vastly reduces file
        size. Note that the deinterlacer will never deinterlace a unique
        filename twice, a user must manually delete the deint_pkl_file.
        
        
        :param deint_pkl_file: the filename of the pickled set of all
                               deinterlaced files
        :type  deint_pkl_file: string
        :param base_dir: the base directory to walk
        :type  base_dir: string
        """
        self.deint_pkl_file = deint_pkl_file
        self.deinterlaced_files = self.get_deinterlaced_files()
        self.sleep_time = 30 # the number of seconds to wait after
                             # deinterlacing all files
        self.base_dir = base_dir
        
        
    def get_deinterlaced_files(self):
        """
        unpickles the set of deinterlaced files if it exists. Otherwise, 
        create the set from scratch
        
        :returns: the set of deinterlaced files
        :rtype  : set
        """
        try:
            with open(self.deint_pkl_file, 'rb') as f:
                deinterlaced_files = pickle.load(f)
        except:
            print("Exception - will create new deinterlaced files set")
            deinterlaced_files = set()
        return deinterlaced_files
    
    
    def pickle_deinterlaced_files(self):
        """ pickle the process_files set """
        with open(self.deint_pkl_file, 'wb') as f:
            pickle.dump(self.deinterlaced_files, f)
    
    
    def de_interlace(self, img):
        """
        de-interlaces an image by taking every other row and every other colum

        :param img: the image data
        :type  img: ndarray
        
        :returns de_interlaced_image: the de-interlaced image
        :rtype   de_interlaced_image: ndarray
        """
        de_interlaced_img = img[::2, 1::2]
        return de_interlaced_img
    
    
    def process_file(self, filepath):
        """
        reads in filepath, de-interlaces, and writes new image
        
        :param filepath: the filename of the image to be operated on
        :type  filepath: string
        :returns: True if the file has already been deinterlaced
        """
        img = im.imread(filepath)
        if img.shape != (1080, 1920, 3):
            return True
        de_interlaced_img = self.de_interlace(img)
        im.imsave(filepath, de_interlaced_img)
        
        
    def file_standards(self, file, filepath):
        """
        checks to make sure that the file should be processed. In this case:
        
        The file must have the IMG extension
        The file must not have already been processed
        The file must be larger then 2MB. Interlaced images are around 10MB,
        while deinterlaced images are are ound 850KB.
        
        :param file: the name of the file
        :type  file: string
        :param filepath: the full filepath of the file
        :type  filepath: string
        
        :returns: whether or not the file/filepath should be included
        :rtype  : bool
        """
        is_xml   = bool(file[-4:] == c.IMG_EXT)
        not_done = bool(filepath not in self.deinterlaced_files)
        is_large = bool(os.path.getsize(filepath) > 2*(10**6))
        return (all([is_xml, not_done, is_large]))
    
    
    def process_files(self):
        """ Walk through all files and try to deinterlace every image """
        while True:
            #iterate through all files
            for root, dirs, files in os.walk(self.base_dir):
                for file in sorted(files):
                    filepath = os.path.join(root, file)

                    # check that it is an image file created by ffmpeg and not already processed
                    if self.file_standards(file, filepath):
                        self.process_file(filepath)
                        self.deinterlaced_files.add(filepath)
                        print(filepath + ": done")
                        
                self.pickle_deinterlaced_files()

            sleep(self.sleep_time)

In [3]:
deinterlacer = Deinterlacer()
deinterlacer.process_files()


/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-41-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-42-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-43-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-44-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-45-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-46-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-10.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-15.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-20.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-25.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-30.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-35.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-40.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-45.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-50.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-47-55.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-48-00.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-48-05.png: done
/Volumes/nyee/datasets/frcnn_data/D0772_10HD/imgs/D0772_10HD_00-48-10.png: done
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-3-480a82627401> in <module>()
      1 deinterlacer = Deinterlacer()
----> 2 deinterlacer.process_files()

<ipython-input-2-b27f82d0a48b> in process_files(self)
    100 
    101                     # check that it is an image file created by ffmpeg and not already processed
--> 102                     if self.file_standards(file, filepath):
    103                         self.process_file(filepath)
    104                         self.deinterlaced_files.add(filepath)

<ipython-input-2-b27f82d0a48b> in file_standards(self, file, filepath)
     84         is_xml   = bool(file[-4:] == c.IMG_EXT)
     85         not_done = bool(filepath not in self.deinterlaced_files)
---> 86         large    = bool(os.path.getsize(filepath) > 2*(10**6))
     87         return (all([is_xml, not_done, large]))
     88 

~/anaconda3/envs/Video-Processing/lib/python3.6/genericpath.py in getsize(filename)
     48 def getsize(filename):
     49     """Return the size of a file, reported by os.stat()."""
---> 50     return os.stat(filename).st_size
     51 
     52 

FileNotFoundError: [Errno 2] No such file or directory: '/Volumes/nyee/datasets/frcnn_data/D0772_10HD/.DS_Store'

In [ ]: