In [20]:
from PIL import Image, ImageDraw, ImageFont
# from IPython.display import Image as Ipython_img
img_path = r"C:\Users\jiang\Desktop\pic.jpg"
font_path = "c:/Windows/fonts/SIMHEI.ttf"
picture = Image.open(img_path, mode='r')
fnt = ImageFont.truetype(font_path, 60)
draw = ImageDraw.Draw(picture)
draw.text((picture.size[0] - 40, 0), "4",font=fnt, fill=(255,0,0,255))
picture.save(r"C:\Users\jiang\Desktop\test.jpg", 'jpeg')
picture # display
Out[20]:
In [3]:
import numpy as np, string
def gen_tickets(tickets_num, tickets_len):
idx = np.random.randint(0, 25, tickets_num * tickets_len)
letters = np.array(list(string.ascii_uppercase))
s = letters[idx].tostring()
return [s[i*tickets_len : (i+1)*tickets_len] for i in range(tickets_num)]
## 测试
print gen_tickets(5, 100)
In [5]:
import numpy as np, string
def gen_a_ticket(tickets_len):
idx = np.random.randint(0, 25, tickets_len)
letters = np.array(list(string.ascii_uppercase))
s = letters[idx].tostring()
yield s
import MySQLdb
db = MySQLdb.connect("localhost", "root", "******", "database_for_python")
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS python_tickets")
sql = """CREATE TABLE python_tickets(
num INT,
ticket_code CHAR(100))"""
cursor.execute(sql)
for i in range(1, 201):
ticket = gen_a_ticket(100).next()
sql = "INSERT INTO python_tickets VALUES ('%d', '%s')" % (i, ticket)
cursor.execute(sql)
db.commit()
db.close()
In [17]:
import numpy as np, string
def gen_a_ticket(tickets_len):
idx = np.random.randint(0, 25, tickets_len)
letters = np.array(list(string.ascii_uppercase))
s = letters[idx].tostring()
yield s
import redis
rd = redis.Redis(host='localhost', port=6379, db=0)
for i in range(1, 201):
ticket = gen_a_ticket(100).next()
rd.set(i, ticket)
rd.save()
# test
print rd.get(55)
In [41]:
#coding=utf-8
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
vectorizer = CountVectorizer()
text_path = r"C:\Users\jiang\Desktop\Github_Pages.txt" #utf-8 without BOM
with open(text_path, 'r') as f:
s = f.readlines()
X = vectorizer.fit_transform(s)
words = vectorizer.get_feature_names()
counts = np.sum(X.toarray(), 0).tolist()
results = sorted(zip(words, counts), key=lambda x:x[1], reverse=True)
for word, count in results: # print results
print word, count
In [9]:
from PIL import Image
import os
imgs_path = r"C:\Users\jiang\Desktop\pictures"
for root, dirs, files in os.walk(imgs_path):
for f in files:
img = Image.open(os.path.join(root, f))
img = img.resize([64, 100]).save(os.path.join(root, 'rs_' + f), 'jpeg')
In [130]:
from bs4 import BeautifulSoup
import urllib2
def get_links_from_url(url):
html_page = urllib2.urlopen(url)
links = BeautifulSoup(html_page).findAll('a')
links = [i.get('href') for i in links if i.get('href') and not i.get('href').startswith('javascript:')] # 去掉javascript开头的
proto, rest = urllib2.splittype(url)
domain = urllib2.splithost(rest)[0]
links = map(lambda i: proto + '://' + domain + i if i[0] == '/' else url + i if i[0] == '#' else i, links) # 把链接补全
return links
def get_article_links():
url = "http://www.jianshu.com/"
links = get_links_from_url(url)
links = [i for i in links if i.startswith('http://www.jianshu.com/c/')] # 类别
links_class = list(set(links))
links_article = list()
for link in links_class:
links = get_links_from_url(link)
links = [i for i in links if i.startswith('http://www.jianshu.com/p/') and not i.endswith('#comments')] # 文章
links = list(set(links))
links_article.extend(links)
links_article = list(set(links_article))
return links_article
from goose import Goose
from goose.text import StopWordsChinese
import os
def save_articles_from_links(links, articles_path):
g = Goose({'stopwords_class': StopWordsChinese})
for url in links:
article = g.extract(url=url)
valid_title = map(lambda i : " " if not (
(u'\u4e00' <= i <=u'\u9fff') or
(u'\u0030' <= i <=u'\u0039') or
(u'\u0041' <= i <= u'\u005a') or
(u'\u0061' <= i <= u'\u007a') ) else i, article.title)
valid_title = ''.join(valid_title)
article_file = os.path.join(articles_path, valid_title + '.txt')
with open(article_file, 'w') as f:
f.write(article.cleaned_text.encode('utf-8'))
# 从简书网站中提取文章,存入本地文件中
articles_path = r"C:\Users\jiang\Desktop\articles"
links = get_article_links()
save_articles_from_links(links, articles_path)
# 由TF-IDF判断词的重要性
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import jieba
import os
vectorizer=CountVectorizer()
transformer=TfidfTransformer()
articles_path = r"C:\Users\jiang\Desktop\articles"
corpus = []
titles = []
for root, dirs, files in os.walk(articles_path):
for title in files:
titles.append(title)
file_name = os.path.join(root, title)
with open(file_name) as f:
s = f.read()
words = jieba.lcut(s, cut_all=True)
s = ' '.join(words)
corpus.append(s)
vectorizer=CountVectorizer()
transformer=TfidfTransformer()
tfidf=transformer.fit_transform(vectorizer.fit_transform(corpus))
word=vectorizer.get_feature_names()
weight=tfidf.toarray()
for i in range(len(weight)):
print "-------------------------------------------------------------------------------------"
print "文章", i, ":", titles[i].decode('gbk')
w = weight[i][::]
tmp = zip(word, w)
tmp = sorted(tmp, key=lambda i : i[1], reverse=True)
print "最重要的词:"
for j in range(5):
print "word:", tmp[j][0], "TF-IDF:", tmp[j][1]
In [31]:
code_path = r'C:\Users\jiang\Documents\MATLAB' #matlab files
import os
file_count, code_line_count, blank_line_count, comment_line_count = 0, 0, 0, 0
for root, dirs, files in os.walk(code_path):
for f in files:
if f.endswith('.m'):
file_count += 1
with open(os.path.join(root, f), 'r') as cur_f:
for line in cur_f:
if line.strip().startswith('%'):
comment_line_count += 1
elif line.isspace():
blank_line_count += 1
else:
code_line_count += 1
print "file_count, code_line_count, blank_line_count, comment_line_count: "
print file_count, code_line_count, blank_line_count, comment_line_count
In [23]:
from goose import Goose
from goose.text import StopWordsChinese
url = "http://www.jianshu.com/p/05cfea46e4fd"
html_doc = r"C:\Users\jiang\Desktop\2016年人工智能领域的总结与思考:未来将面临的五大考验 - 简书.html"
with open(html_doc.decode('utf8'), 'r') as f:
words = f.read()
g = Goose({'stopwords_class': StopWordsChinese})
article = g.extract(raw_html=words)
print(article.title)
print(article.cleaned_text)
In [32]:
from bs4 import BeautifulSoup
import urllib2
url = "http://www.jianshu.com/p/05cfea46e4fd"
html_page = urllib2.urlopen(url)
links = BeautifulSoup(html_page).findAll('a')
links = [i.get('href') for i in links if i.get('href') and not i.get('href').startswith('javascript:')] # 去掉javascript开头的
proto, rest = urllib2.splittype(url)
domain = urllib2.splithost(rest)[0]
links = map(lambda i: proto + '://' + domain + i if i[0] == '/' else url + i if i[0] == '#' else i, links) # 把链接补全
for link in links:
print(link)
In [86]:
import string, random
from PIL import Image, ImageFont, ImageFilter, ImageDraw
def get_random_color():
return tuple([random.randint(50, 150) for _ in range(3)])
def get_verify_picture():
font_path = "c:/Windows/fonts/SIMHEI.ttf"
letters = [random.choice(string.letters) for i in range(4)]
font = ImageFont.truetype(font_path, 50)
width, height = 240, 60
pic = Image.new('RGB', (width, height), (200, 200, 200))
draw = ImageDraw.Draw(pic)
for i, letter in enumerate(letters):
draw.text((60 * i + random.randrange(0, 20), random.randrange(0, 10)), letter, font=font, fill=get_random_color())
for i in range(5000):
draw.point((random.randint(0,width), random.randint(0,height)), fill=get_random_color())
return pic.filter(ImageFilter.BLUR)
get_verify_picture()
Out[86]:
In [ ]:
import sys, locale
filtered_words = r"C:\Users\jiang\Desktop\filtered_words.txt"
filtered_words_dict = {}
with open(filtered_words) as f:
for line in f:
word = line.strip()
if not filtered_words_dict.has_key(word):
filtered_words_dict[word] = True
while True:
if filtered_words_dict.has_key(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)).encode('utf-8')):
print("Freedom")
else:
print("Human Rights")
In [ ]:
import sys, locale
filtered_words = r"C:\Users\jiang\Desktop\filtered_words.txt"
filtered_words_dict = {}
with open(filtered_words) as f:
for line in f:
word = line.strip()
if not filtered_words_dict.has_key(word):
filtered_words_dict[word] = True
while True:
s = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)).encode('utf-8')
for key in filtered_words_dict.keys():
while s.find(key) != -1:
start = s.find(key)
if start != -1:
s = s[: start] + '*' + s[start + len(key) :]
print s
In [14]:
import requests
import lxml.html
url = "http://tieba.baidu.com/p/2166231880"
path = "C:\\Users\\jiang\\Desktop\\imgs\\"
page = requests.get(url).text
doc = lxml.html.document_fromstring(page)
for idx, el in enumerate(doc.cssselect('img.BDE_Image')):
with open(path + '%03d.jpg' % idx, 'wb') as f:
f.write(requests.get(el.attrib['src']).content)
In [30]:
#encoding:utf-8
#使用pandas进行转换
import pandas as pd
txt_path = r'C:\Users\jiang\Desktop\student.txt'
excel_path = r"C:\Users\jiang\Desktop\student.xls"
with open(txt_path) as f:
s = eval(f.read(), {})
for v in s.values():
for i in range(len(v)):
if isinstance(v[i], basestring):
v[i] = str(v[i]).decode('utf-8') #把dict所有的字符串转成'utf-8'编码
s = pd.DataFrame(s).T
s.to_excel(excel_path, 'student', header=False)
In [2]:
#encoding:utf-8
#使用pandas进行转换
import pandas as pd
txt_path = r'C:\Users\jiang\Desktop\city.txt'
excel_path = r"C:\Users\jiang\Desktop\city.xls"
with open(txt_path) as f:
s = eval(f.read(), {})
for k, v in s.items():
if isinstance(v, basestring):
s[k] = str(v).decode('utf-8') #把dict中所有的字符串转成'utf-8'编码
s = pd.DataFrame(s, index=[0]).T
s.to_excel(excel_path, 'city', header=False)
In [16]:
#encoding:utf-8
#使用pandas进行转换
import pandas as pd
txt_path = r'C:\Users\jiang\Desktop\numbers.txt'
excel_path = r"C:\Users\jiang\Desktop\numbers.xls"
with open(txt_path) as f:
s = eval(f.read(), {})
s = pd.DataFrame(s)
s.to_excel(excel_path, 'numbers', header=False, index=False)
In [6]:
import xlrd #使用xlrd读取xls
import json #使用json.dumps()格式化dict输出
xls_path = r'C:\Users\jiang\Desktop\student.xls'
xml_path = r"C:\Users\jiang\Desktop\student.xml"
data = xlrd.open_workbook(xls_path)
with open(xml_path, 'w') as f:
f.write(r'<?xml version="1.0" encoding="UTF-8"?>')
f.write("\n<root>\n")
for sheet in data.sheets():
f.write('<students>\n<!-- \n\t学生信息表\n\t"id" : [名字, 数学, 语文, 英文]\n-->\n')
sheet_dict = {}
for i in range(sheet.nrows):
sheet_dict[sheet.cell_value(i, 0)] = [sheet.cell_value(i, j) for j in range(1, sheet.ncols)]
s = json.dumps(sheet_dict, ensure_ascii=False, indent=4, sort_keys=True)
f.write(s.encode('utf-8'))
f.write("\n</students>\n")
f.write("<root>")
In [5]:
import xlrd #使用xlrd读取xls
import json #使用json.dumps()格式化dict输出
xls_path = r'C:\Users\jiang\Desktop\city.xls'
xml_path = r"C:\Users\jiang\Desktop\city.xml"
data = xlrd.open_workbook(xls_path)
with open(xml_path, 'w') as f:
f.write(r'<?xml version="1.0" encoding="UTF-8"?>')
f.write("\n<root>\n")
for sheet in data.sheets():
f.write('<citys>\n<!-- \n\t城市信息\n-->\n')
sheet_dict = {}
for i in range(sheet.nrows):
sheet_dict[sheet.cell_value(i, 0)] = sheet.cell_value(i, 1)
s = json.dumps(sheet_dict, ensure_ascii=False, indent=4, sort_keys=True)
f.write(s.encode('utf-8'))
f.write("\n</citys>\n")
f.write("<root>")
In [9]:
import xlrd #使用xlrd读取xls
import json #使用json.dumps()格式化dict输出
xls_path = r'C:\Users\jiang\Desktop\numbers.xls'
xml_path = r"C:\Users\jiang\Desktop\numbers.xml"
data = xlrd.open_workbook(xls_path)
with open(xml_path, 'w') as f:
f.write(r'<?xml version="1.0" encoding="UTF-8"?>')
f.write("\n<root>\n")
for sheet in data.sheets():
f.write('<numbers>\n<!-- \n\t数字信息\n-->\n')
sheet_list = []
for i in range(sheet.nrows):
sheet_list.append([sheet.cell_value(i, j) for j in range(sheet.ncols)])
s = json.dumps(sheet_list, ensure_ascii=False, indent=4, sort_keys=True)
f.write(s.encode('utf-8'))
f.write("\n</numbers>\n")
f.write("<root>")
In [1]:
import pandas as pd
xls_path = r'C:\Users\jiang\Desktop\201612_通话详单.xls'
df = pd.read_excel(xls_path.decode('utf-8'))
df = df.to_dict()
ts = df[u'通信时长'].values()
shi, fen, miao = 0, 0, 0
for t in ts:
try:
s = t.find(u'时')
f = t.find(u'分')
m = t.find(u'秒')
if s != -1:
shi += int(t[:s])
if f != -1:
fen += int(t[s+1:f])
if m != -1:
miao += int(t[f+1:m])
except:
pass
fen += miao / 60
miao %= 60
shi += fen / 60
fen %= 60
print("通信时长:%d时%d分%d秒" % (shi, fen, miao))
In [33]:
import os
from hashlib import sha256
from hmac import HMAC
def encrypt_password(password, salt=None):
if salt is None:
salt = os.urandom(8)
for i in xrange(10):
result = HMAC(password, salt, sha256).digest()
return salt + result
def validate_password(hashed, input_password):
return hashed == encrypt_password(input_password, salt=hashed[:8])
my_password = '12345678'
hashed = encrypt_password(my_password)
print 'my_password:', my_password
print 'hashed: ', hashed
print validate_password(hashed, my_password)
print validate_password(hashed, '1234567')
In [4]:
from PIL import Image
import os
imgs_path = r"C:\Users\jiang\Desktop\pictures"
picture_size = [64, 100]
def change_pic_size(imgs_path, picture_size):
for root, dirs, files in os.walk(imgs_path):
for f in files:
img = Image.open(os.path.join(root, f))
img = img.resize(picture_size).save(os.path.join(root, 'rs_' + f), 'jpeg')
change_pic_size(imgs_path, picture_size)
In [46]:
# 跟随 Flask 的教程,可以做一个微博客应用,和这个差不多:http://docs.jinkan.org/docs/flask/tutorial/index.html
# 最后先看一下 Flask 的快速入门:http://docs.jinkan.org/docs/flask/quickstart.html
# 不想搞web,跳过本题
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
return 'Web 版本 留言簿...'
app.run()
In [20]:
# 不想搞web,跳过本题
from flask import Flask
app = Flask(__name__)
@app.route('/')
def index():
return 'Web 版本 TodoList 应用...'
app.run()
In [45]:
import speech_recognition as sr
import webbrowser
MY_BING_KEY = "d8b094b351e6444e9e6ff8d922ac64ff" #key from Microsoft : https://www.microsoft.com/cognitive-services/en-us/speech-api
url = {"打开百度" : "www.baidu.com", "打开谷歌" : "www.google.com", "打开知乎" : "www.zhihu.com", "打开博客" : "www.cnblogs.com"}
r = sr.Recognizer()
stop = False
while not stop:
with sr.Microphone() as source:
print("正在聆听...")
audio = r.listen(source)
try:
text = r.recognize_bing(audio, key=MY_BING_KEY, language='zh-CN').encode('utf-8')
except:
text = None
if url.has_key(text):
print("你的输入:" + text)
webbrowser.open_new_tab(url[text])
elif text == "停止":
print("你的输入:" + text)
stop = True
elif text is None:
print "抱歉,没听懂,请使用普通话。"
else:
print("你的输入:" + text + "【抱歉,仅支持: 打开百度、打开谷歌,打开知乎,打开博客】")