In [ ]:
#os and ntpath methods to deal with #directory and file names
#path #os #ntpath #glob #dir #filename

import ntpath
import os

path = r'C:\Program Filez (Portable)\Console2\console.chm'
# Note: I don't call this directory 'Filez' with a z because it's pirated,
# But so it will sort third after Program Files and Program Files (x86)

print('os:')
dirname = os.path.dirname(path)
basename = os.path.basename(path)
print(dirname, basename, os.path.join(dirname, basename))
print('exists:', os.path.exists(dirname), os.path.exists(basename), os.path.exists(path))
print('isdir:', os.path.isdir(dirname), os.path.isdir(basename), os.path.isdir(path))
print('isfile:', os.path.isfile(dirname), os.path.isfile(basename), os.path.isfile(path))
print('split:', os.path.split(path))
print('splitext:', os.path.splitext(path), os.path.splitext(dirname), os.path.splitext(basename))
print('splitunc:', os.path.splitunc(path), os.path.splitunc(dirname), os.path.splitunc(basename))

print('----------')
print('ntpath:', ntpath.dirname(path), ntpath.basename(path),)
# qv http://pydoc.org/2.5.1/ntpath.html

In [ ]:
x = """
           _ _ _                            _      __ _      _     _        _                            
 ___ _ __ | (_) |_    ___ _____   __   __ _| |_   / _(_) ___| | __| |   ___| |__   __ _ _ __   __ _  ___ 
/ __| '_ \| | | __|  / __/ __\ \ / /  / _` | __| | |_| |/ _ \ |/ _` |  / __| '_ \ / _` | '_ \ / _` |/ _ \
\__ \ |_) | | | |_  | (__\__ \\ V /  | (_| | |_  |  _| |  __/ | (_| | | (__| | | | (_| | | | | (_| |  __/
|___/ .__/|_|_|\__|  \___|___/ \_/    \__,_|\__| |_| |_|\___|_|\__,_|  \___|_| |_|\__,_|_| |_|\__, |\___|
    |_|                                                                                       |___/      

    """

import os

def split_file(filepath, lines=numlines):
    """Split a file based on a number of lines, after a change in the first field of a csv. Presumes there is always at least one 
    field change between each block of lines."""
    counter = 0
    path, filename = os.path.split(filepath)
    # filename.split('.') would not work for filenames with more than one .
    basename, ext = os.path.splitext(filename)
    # open input file
    with open(filepath, 'r') as f_in:
        try:
            # open the first output file
            f_out = open(os.path.join(path, '{}_{}{}'.format(basename, counter, ext)), 'w')
            status = 'incomplete'
            # loop over all lines in the input file, and number them
            for i, line in enumerate(f_in):
                if i == 0:
                    firstline = line
                if status == 'waiting for field change':
                    if line.strip().split(',')[0] != field:
                        f_out.close()
                        counter += 1
                        print counter
                        f_out = open(os.path.join(path, '{}_{}{}'.format(basename, counter, ext)), 'w')
                        status = 'incomplete'
                        f_out.write(firstline)
                if (i > 5 and i % lines == 0):
                    status = 'waiting for field change'
                    field = line.strip().split(',')[0]
                # write the line to the output file
                if i <> 1: # remove second line of file, which is not data
                    f_out.write(line)
        finally:
            # close the last output file
            f_out.close()

split_file("C:/Users/David/Documents/IPython Notebooks/elancematthew/wxdatarawclean.csv", 567000)

In [ ]:
#open random folder from list

import os
import random
import webbrowser

def w2p(path): # windows to posix
    return path.replace("\\", "/")

#if a leaf folder is chosen, that folder will open.
#if a node folder is chosen, one of its subfolders will open.
#you can weight the leaf and node folders, but not the node's subfolders.

leaf_folders = {"C:\api": 10,
    "C:\_FTP": 10,
    "C:\Program Filez (Portable)": 5,
    "C:\Users\David\Google Drive": 5,
    "C:\Users\David\Pictures": 5,
    "C:\Users\David\Documents": 5,
    "C:\Users\David\Documents\Projects": 10,
    "C:\Users\David\Documents\IPython Notebooks": 10,
    "C:\Users\David\Documents\IPython Data": 5,
    "C:\Users\David\Documents\Dropbox": 10,
    "C:\Users\David\Documents\Dropbox\DT": 5,
    "C:\Users\David\Documents\Dropbox\Sync": 20,
    "C:\Users\David\Documents\Dropbox\Sync\books": 10}

node_folders = {"C:\api": 10,
    "C:\Users\David\Downloads": 10,
    "C:\Users\David\Google Drive": 10,
    "C:\Users\David\Pictures": 10,
    "C:\Users\David\Documents\Projects": 40,
    "C:\Users\David\Documents\IPython Notebooks": 30,
    "C:\Users\David\Documents\IPython Data": 5,
    "C:\Users\David\Documents\Dropbox": 20,
    "C:\Users\David\Documents\Dropbox\Sync": 30}

all_folders = []
for key in leaf_folders.keys():
    for value in range(leaf_folders[key]):
        all_folders.append(key)
for key in node_folders.keys():
    for value in range(node_folders[key]):
        all_folders.append(key) 

random.shuffle(all_folders)
os.startfile(all_folders[0])

urls = []
with open("C:/Program Filez (Portable)/urls2gothru.txt", "r") as f:
    for line in f.readlines():
        urls.append(line.rstrip())
random.shuffle(urls)
webbrowser.open_new(urls[0])
with open("C:/Program Filez (Portable)/urls2gothru.txt", "w") as f:
    for i in range(len(urls)):
        if i != 0:
            f.write(urls[i]+'\n')

In [ ]:
#rename #random #files by adding prefix

path = "H:/Music/"

import os
import glob
from random import shuffle

pathlist = glob.glob(path + '*.mp3')

numbers = range(len(pathlist))
shuffle(numbers)

print numbers
print pathlist

for i in range(len(numbers)):
    newname = os.path.dirname(pathlist[i]) + '/' + str(numbers[i]) + '_' + os.path.basename(pathlist[i])
    os.rename(pathlist[i], newname)

In [ ]:
#os #split #text files every n lines
#every file in a directory!

import os
path = "C:/Users/David/to_split"

os.chdir(path)
    
import glob
listoffiles = glob.glob('*.csv')

prefix = "all_"
turnover = 1000000

for filename in listoffiles:
    with open("filename", "rt") as f:
    i = 0
    fout = open(prefix + filename + "0.csv", "w")
    for line in f.readlines():
      fout.write(line)
      i+=1
      if i%turnover == 0:
        fout.close()
        fout = open(prefix + filename + "output%d.csv"%(i/turnover),"wb")
    fout.close()

In [ ]:
# os delete every n files

import glob
import os

multiple = 2

extension = ".jpg"

origpath = "C:/Users\David\Downloads\Contact opening 4500 frames (8-16-2014 7-39-08 AM)"
thepath = win2posix(origpath) # see function elsewhere in this notebook
os.chdir(thepath) 

if thepath[-1] != "/":
    thepath += "/"   

filelist = glob.glob(thepath + "*" + extension)

print "Original count: %d files." % (len(filelist))

for counter in range(len(filelist)):
    if counter % multiple != 0:
        os.remove(filelist[counter])
        
newfilelist = glob.glob(thepath + "*" + extension)

print "     New count: %d files." % (len(newfilelist))

In [ ]:
#traverse #file #tree

def w2p(path):
    return path.replace("\\", "/")
import os
for root, dirs, files in os.walk("C:\Python_packages\lingpy-2"):
    for file in files:
        if file[-3:] == '.py':
            contents = open(w2p(root) + '/' + file, 'r').read()
            contents = "# -*- coding: utf-8 -*-\n" + contents
            #open(w2p(root) + '/' + file, 'w').write(contents) # rewrites file