In [ ]:
#os and ntpath methods to deal with #directory and file names
#path #os #ntpath #glob #dir #filename
import ntpath
import os
path = r'C:\Program Filez (Portable)\Console2\console.chm'
# Note: I don't call this directory 'Filez' with a z because it's pirated,
# But so it will sort third after Program Files and Program Files (x86)
print('os:')
dirname = os.path.dirname(path)
basename = os.path.basename(path)
print(dirname, basename, os.path.join(dirname, basename))
print('exists:', os.path.exists(dirname), os.path.exists(basename), os.path.exists(path))
print('isdir:', os.path.isdir(dirname), os.path.isdir(basename), os.path.isdir(path))
print('isfile:', os.path.isfile(dirname), os.path.isfile(basename), os.path.isfile(path))
print('split:', os.path.split(path))
print('splitext:', os.path.splitext(path), os.path.splitext(dirname), os.path.splitext(basename))
print('splitunc:', os.path.splitunc(path), os.path.splitunc(dirname), os.path.splitunc(basename))
print('----------')
print('ntpath:', ntpath.dirname(path), ntpath.basename(path),)
# qv http://pydoc.org/2.5.1/ntpath.html
In [ ]:
x = """
_ _ _ _ __ _ _ _ _
___ _ __ | (_) |_ ___ _____ __ __ _| |_ / _(_) ___| | __| | ___| |__ __ _ _ __ __ _ ___
/ __| '_ \| | | __| / __/ __\ \ / / / _` | __| | |_| |/ _ \ |/ _` | / __| '_ \ / _` | '_ \ / _` |/ _ \
\__ \ |_) | | | |_ | (__\__ \\ V / | (_| | |_ | _| | __/ | (_| | | (__| | | | (_| | | | | (_| | __/
|___/ .__/|_|_|\__| \___|___/ \_/ \__,_|\__| |_| |_|\___|_|\__,_| \___|_| |_|\__,_|_| |_|\__, |\___|
|_| |___/
"""
import os
def split_file(filepath, lines=numlines):
"""Split a file based on a number of lines, after a change in the first field of a csv. Presumes there is always at least one
field change between each block of lines."""
counter = 0
path, filename = os.path.split(filepath)
# filename.split('.') would not work for filenames with more than one .
basename, ext = os.path.splitext(filename)
# open input file
with open(filepath, 'r') as f_in:
try:
# open the first output file
f_out = open(os.path.join(path, '{}_{}{}'.format(basename, counter, ext)), 'w')
status = 'incomplete'
# loop over all lines in the input file, and number them
for i, line in enumerate(f_in):
if i == 0:
firstline = line
if status == 'waiting for field change':
if line.strip().split(',')[0] != field:
f_out.close()
counter += 1
print counter
f_out = open(os.path.join(path, '{}_{}{}'.format(basename, counter, ext)), 'w')
status = 'incomplete'
f_out.write(firstline)
if (i > 5 and i % lines == 0):
status = 'waiting for field change'
field = line.strip().split(',')[0]
# write the line to the output file
if i <> 1: # remove second line of file, which is not data
f_out.write(line)
finally:
# close the last output file
f_out.close()
split_file("C:/Users/David/Documents/IPython Notebooks/elancematthew/wxdatarawclean.csv", 567000)
In [ ]:
#open random folder from list
import os
import random
import webbrowser
def w2p(path): # windows to posix
return path.replace("\\", "/")
#if a leaf folder is chosen, that folder will open.
#if a node folder is chosen, one of its subfolders will open.
#you can weight the leaf and node folders, but not the node's subfolders.
leaf_folders = {"C:\api": 10,
"C:\_FTP": 10,
"C:\Program Filez (Portable)": 5,
"C:\Users\David\Google Drive": 5,
"C:\Users\David\Pictures": 5,
"C:\Users\David\Documents": 5,
"C:\Users\David\Documents\Projects": 10,
"C:\Users\David\Documents\IPython Notebooks": 10,
"C:\Users\David\Documents\IPython Data": 5,
"C:\Users\David\Documents\Dropbox": 10,
"C:\Users\David\Documents\Dropbox\DT": 5,
"C:\Users\David\Documents\Dropbox\Sync": 20,
"C:\Users\David\Documents\Dropbox\Sync\books": 10}
node_folders = {"C:\api": 10,
"C:\Users\David\Downloads": 10,
"C:\Users\David\Google Drive": 10,
"C:\Users\David\Pictures": 10,
"C:\Users\David\Documents\Projects": 40,
"C:\Users\David\Documents\IPython Notebooks": 30,
"C:\Users\David\Documents\IPython Data": 5,
"C:\Users\David\Documents\Dropbox": 20,
"C:\Users\David\Documents\Dropbox\Sync": 30}
all_folders = []
for key in leaf_folders.keys():
for value in range(leaf_folders[key]):
all_folders.append(key)
for key in node_folders.keys():
for value in range(node_folders[key]):
all_folders.append(key)
random.shuffle(all_folders)
os.startfile(all_folders[0])
urls = []
with open("C:/Program Filez (Portable)/urls2gothru.txt", "r") as f:
for line in f.readlines():
urls.append(line.rstrip())
random.shuffle(urls)
webbrowser.open_new(urls[0])
with open("C:/Program Filez (Portable)/urls2gothru.txt", "w") as f:
for i in range(len(urls)):
if i != 0:
f.write(urls[i]+'\n')
In [ ]:
#rename #random #files by adding prefix
path = "H:/Music/"
import os
import glob
from random import shuffle
pathlist = glob.glob(path + '*.mp3')
numbers = range(len(pathlist))
shuffle(numbers)
print numbers
print pathlist
for i in range(len(numbers)):
newname = os.path.dirname(pathlist[i]) + '/' + str(numbers[i]) + '_' + os.path.basename(pathlist[i])
os.rename(pathlist[i], newname)
In [ ]:
#os #split #text files every n lines
#every file in a directory!
import os
path = "C:/Users/David/to_split"
os.chdir(path)
import glob
listoffiles = glob.glob('*.csv')
prefix = "all_"
turnover = 1000000
for filename in listoffiles:
with open("filename", "rt") as f:
i = 0
fout = open(prefix + filename + "0.csv", "w")
for line in f.readlines():
fout.write(line)
i+=1
if i%turnover == 0:
fout.close()
fout = open(prefix + filename + "output%d.csv"%(i/turnover),"wb")
fout.close()
In [ ]:
# os delete every n files
import glob
import os
multiple = 2
extension = ".jpg"
origpath = "C:/Users\David\Downloads\Contact opening 4500 frames (8-16-2014 7-39-08 AM)"
thepath = win2posix(origpath) # see function elsewhere in this notebook
os.chdir(thepath)
if thepath[-1] != "/":
thepath += "/"
filelist = glob.glob(thepath + "*" + extension)
print "Original count: %d files." % (len(filelist))
for counter in range(len(filelist)):
if counter % multiple != 0:
os.remove(filelist[counter])
newfilelist = glob.glob(thepath + "*" + extension)
print " New count: %d files." % (len(newfilelist))
In [ ]:
#traverse #file #tree
def w2p(path):
return path.replace("\\", "/")
import os
for root, dirs, files in os.walk("C:\Python_packages\lingpy-2"):
for file in files:
if file[-3:] == '.py':
contents = open(w2p(root) + '/' + file, 'r').read()
contents = "# -*- coding: utf-8 -*-\n" + contents
#open(w2p(root) + '/' + file, 'w').write(contents) # rewrites file