In [1]:
%matplotlib inline
%load_ext sql
In [2]:
import itertools
import random
import json
from functools import reduce
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
sns.set(rc={"figure.figsize": (10, 6)})
sns.set_style("whitegrid")
sns.set_context("notebook")
In [4]:
list_to_string = lambda x, y: x + ", " + y
In [5]:
def get_dbconnect_string(json_path, db_interface="postgresql+psycopg2"):
with open(json_path, "rb") as fd:
configuration = json.loads(fd.read())
return db_interface + "://{user}:{password}@{host}:{port}/{database}".format(**configuration)
db_configuration = get_dbconnect_string("database.json")
In [6]:
%%sql $db_configuration
SELECT MIN(id), MAX(id), COUNT(*) FROM posts
1 rows affected.
Out[6]:
min
max
count
1
2900000
2867507
In [7]:
((min_id, max_id, nr_ids),) = _
print("Total posts: {} – From {} to {}".format(nr_ids, min_id, max_id))
Total posts: 2867507 – From 1 to 2900000
In [8]:
posts_columns = %sql SELECT column_name FROM information_schema.columns WHERE table_name = 'posts'
24 rows affected.
In [9]:
print("Columns in posts table:")
print(reduce(list_to_string, (x[0] for x in posts_columns)))
Columns in posts table:
id, created_at, updated_at, uploader_id, score, source, md5, rating, image_width, image_height, file_ext, parent_id, has_children, file_size, up_score, down_score, is_pending, is_flagged, is_deleted, is_banned, pixiv_id, bit_flags, file_url, scraped_at
In [10]:
scores = %sql SELECT score, COUNT(*) FROM posts GROUP BY score ORDER BY score DESC
256 rows affected.
In [11]:
worst_post = %sql SELECT id FROM posts WHERE score = (SELECT MIN(score) FROM posts)
1 rows affected.
In [12]:
best_post = %sql SELECT id FROM posts WHERE score = (SELECT MAX(score) FROM posts)
1 rows affected.
In [13]:
pd_count = scores.DataFrame()["count"]
pd_count.index = scores.DataFrame()["score"]
pd_count.plot(logy=True)
plt.ylabel("count")
plt.title("Post score distribution")
Out[13]:
Text(0.5,1,'Post score distribution')
In [14]:
pending_posts = %sql SELECT id FROM posts WHERE is_pending = true
flagged_posts = %sql SELECT id FROM posts WHERE is_flagged = true
deleted_posts = %sql SELECT id FROM posts WHERE is_deleted = true
banned_posts = %sql SELECT id FROM posts WHERE is_banned = true
0 rows affected.
31 rows affected.
158705 rows affected.
14648 rows affected.
In [15]:
def single_column_to_set(column):
return set(idx[0] for idx in column)
posts_with_status = map(single_column_to_set, (pending_posts, flagged_posts, deleted_posts, banned_posts))
posts_with_status = list(posts_with_status)
In [16]:
post_flag_count = np.array(list(map(len, posts_with_status)))
post_flag_count_log = post_flag_count.copy()
post_flag_count_log[post_flag_count == 0] = 1
post_flag_count_log = np.log10(post_flag_count_log)
In [17]:
rects = plt.bar(range(4), post_flag_count_log)
plt.xticks(range(4), ["pending", "flagged", "deleted", "banned"])
for idx, rect in enumerate(rects):
height = rect.get_height()
width = rect.get_width()
plt.text(rect.get_x() + width / 2, height, post_flag_count[idx], va="bottom", ha="center")
plt.ylabel("count")
yticks = plt.yticks()[0][1:-1]
plt.yticks(yticks, map(lambda x: "$10^{:d}$".format(int(x)), yticks))
plt.title("Post status distribution")
Out[17]:
Text(0.5,1,'Post status distribution')
In [18]:
print("There is a small intersection of deleted and banned posts: %d" % len(posts_with_status[2] & posts_with_status[3]))
print("What is even the difference between these two?")
There is a small intersection of deleted and banned posts: 281
What is even the difference between these two?
Wow, there are a lot of deleted posts. I think I need to check them out manually and their reason for deletion.
But first create a view filtered_posts
where flagged posts are removed. Furthermore remove children and remove posts where the aspect ration is not in 1:2 to 2:1.
In [19]:
%%sql
CREATE OR REPLACE TEMPORARY VIEW filtered_posts AS
(SELECT * FROM posts
WHERE is_pending = false
AND is_flagged = false
AND is_deleted = false
AND is_banned = false
AND parent_id = 0
AND (image_width >= 512 OR image_height >= 512)
AND cast(image_width as double precision) / cast(image_height as double precision) BETWEEN 0.5 AND 2.0)
Done.
Out[19]:
[]
In [20]:
filtered_count = %sql SELECT COUNT(*) FROM filtered_posts
1 rows affected.
In [21]:
filtered_ext = %sql SELECT DISTINCT file_ext FROM filtered_posts
8 rows affected.
In [22]:
print("Now after filtering we have %d posts left with the following file extensions:" % filtered_count[0][0])
print(reduce(list_to_string, (x[0] for x in filtered_ext)))
Now after filtering we have 2261400 posts left with the following file extensions:
gif, jpeg, jpg, mp4, png, swf, webm, zip
In [23]:
%%sql
CREATE OR REPLACE TEMPORARY VIEW filtered_images AS
(SELECT * FROM filtered_posts
WHERE file_ext = 'jpeg'
OR file_ext = 'jpg'
OR file_ext = 'png')
Done.
Out[23]:
[]
In [24]:
%sql SELECT COUNT(*) FROM filtered_images
1 rows affected.
Out[24]:
count
2248054
In [25]:
rating_distribution = %sql SELECT date_trunc('month', created_at) AS month, rating, COUNT(*) FROM posts GROUP BY month, rating ORDER BY month ASC, rating DESC
rating_distribution = np.array(rating_distribution).reshape((150,3,3))
rating_distribution = pd.DataFrame(rating_distribution[:,:,2], columns=rating_distribution[0,:,1], index=rating_distribution[:,0,0], dtype=np.int)
450 rows affected.
In [26]:
rating_distribution.plot.area()
plt.xlim(rating_distribution.index[0], rating_distribution.index[-1])
plt.legend(["safe", "questionable", "explicit"], title="Rating")
plt.xlabel("Date of upload")
plt.ylabel("Uploads per month")
plt.title("Distribution of uploads over time, grouped by rating")
Out[26]:
Text(0.5,1,'Distribution of uploads over time, grouped by rating')
In [27]:
%%sql
CREATE OR REPLACE TEMPORARY VIEW filtered_tags AS
(SELECT tags.name, tag_count.count, tags.category
FROM
(SELECT tag_id, COUNT(post_id)
FROM tagged INNER JOIN filtered_images ON filtered_images.id = tagged.post_id
GROUP BY tag_id) tag_count
INNER JOIN
tags ON tags.id = tag_count.tag_id
ORDER BY tag_count.count DESC)
Done.
Out[27]:
[]
In [28]:
tag_count = %sql SELECT * FROM filtered_tags LIMIT 1000
1000 rows affected.
In [29]:
def list_count(most_popular_tags):
pop_tag_number = len(most_popular_tags)
rank_range = list(range(1, pop_tag_number + 1))
pop_tag_number_length = len(str(pop_tag_number))
formatter = "{: <30}│{: >10}│{: >9}"
formatter = "{: >%d} " % (pop_tag_number_length) + formatter
print(formatter.format("RANK", "NAME", "COUNT", "CATEGORY"))
print("─" * (pop_tag_number_length + 2) + "──────────────────────────────┼──────────┼─────────")
for rank, (name, count, category) in zip(rank_range, most_popular_tags):
print(formatter.format(rank, name, count, category))
In [30]:
list_count(tag_count)
RANK NAME │ COUNT│ CATEGORY
────────────────────────────────────┼──────────┼─────────
1 1girl │ 1570662│ g
2 solo │ 1330532│ g
3 long_hair │ 1010799│ g
4 breasts │ 643865│ g
5 blush │ 639725│ g
6 short_hair │ 631936│ g
7 smile │ 619523│ g
8 multiple_girls │ 495898│ g
9 open_mouth │ 489534│ g
10 looking_at_viewer │ 471852│ g
11 touhou │ 453860│ y
12 blue_eyes │ 452116│ g
13 blonde_hair │ 447439│ g
14 brown_hair │ 417002│ g
15 skirt │ 400779│ g
16 hat │ 373040│ g
17 thighhighs │ 356799│ g
18 red_eyes │ 345543│ g
19 black_hair │ 345461│ g
20 2girls │ 323153│ g
21 large_breasts │ 319220│ g
22 ribbon │ 298167│ g
23 dress │ 278163│ g
24 gloves │ 277466│ g
25 hair_ornament │ 275270│ g
26 bow │ 265775│ g
27 original │ 264886│ y
28 1boy │ 263397│ g
29 brown_eyes │ 261569│ g
30 twintails │ 257282│ g
31 school_uniform │ 257190│ g
32 simple_background │ 235557│ g
33 underwear │ 235510│ g
34 green_eyes │ 221588│ g
35 monochrome │ 216897│ g
36 navel │ 213990│ g
37 blue_hair │ 212850│ g
38 sitting │ 211071│ g
39 panties │ 203581│ g
40 kantai_collection │ 201925│ y
41 animal_ears │ 201837│ g
42 medium_breasts │ 200486│ g
43 cleavage │ 195339│ g
44 highres │ 189066│ g
45 white_background │ 185333│ g
46 shirt │ 182840│ g
47 bad_id │ 177549│ g
48 bad_pixiv_id │ 176506│ g
49 weapon │ 175061│ g
50 purple_eyes │ 172128│ g
51 jewelry │ 171261│ g
52 very_long_hair │ 166802│ g
53 hair_ribbon │ 166785│ g
54 long_sleeves │ 166278│ g
55 closed_eyes │ 164014│ g
56 bare_shoulders │ 160870│ g
57 nipples │ 160053│ g
58 comic │ 159294│ g
59 ponytail │ 154448│ g
60 bangs │ 152521│ g
61 purple_hair │ 152396│ g
62 greyscale │ 150274│ g
63 pink_hair │ 148938│ g
64 black_legwear │ 148648│ g
65 yellow_eyes │ 146350│ g
66 hair_bow │ 141352│ g
67 tail │ 138327│ g
68 flower │ 137893│ g
69 silver_hair │ 137002│ g
70 swimsuit │ 132065│ g
71 wings │ 130350│ g
72 hairband │ 127424│ g
73 ass │ 127263│ g
74 braid │ 126649│ g
75 pantyhose │ 123887│ g
76 boots │ 123315│ g
77 red_hair │ 123158│ g
78 green_hair │ 121470│ g
79 ahoge │ 114770│ g
80 glasses │ 114374│ g
81 serafuku │ 113955│ g
82 japanese_clothes │ 113283│ g
83 one_eye_closed │ 111272│ g
84 detached_sleeves │ 110976│ g
85 :d │ 108633│ g
86 food │ 106316│ g
87 male_focus │ 105690│ g
88 heart │ 105021│ g
89 holding │ 102801│ g
90 multiple_boys │ 101691│ g
91 nude │ 100683│ g
92 necktie │ 99315│ g
93 full_body │ 98903│ g
94 barefoot │ 98411│ g
95 standing │ 98391│ g
96 lying │ 94325│ g
97 collarbone │ 90710│ g
98 bikini │ 89913│ g
99 sword │ 88556│ g
100 shoes │ 88323│ g
101 pleated_skirt │ 87158│ g
102 white_hair │ 85481│ g
103 jacket │ 85398│ g
104 tears │ 84937│ g
105 midriff │ 82444│ g
106 sweat │ 82306│ g
107 small_breasts │ 82206│ g
108 sky │ 81894│ g
109 striped │ 81675│ g
110 upper_body │ 81541│ g
111 elbow_gloves │ 80400│ g
112 short_sleeves │ 80297│ g
113 3girls │ 80031│ g
114 white_legwear │ 78684│ g
115 censored │ 78282│ g
116 eyebrows_visible_through_hair │ 77927│ g
117 frills │ 75683│ g
118 looking_back │ 73862│ g
119 earrings │ 73327│ g
120 fang │ 71008│ g
121 vocaloid │ 69221│ y
122 alternate_costume │ 68993│ g
123 open_clothes │ 68570│ g
124 idolmaster │ 68414│ y
125 pointy_ears │ 68314│ g
126 hair_between_eyes │ 67790│ g
127 hairclip │ 67020│ g
128 day │ 66901│ g
129 shorts │ 66679│ g
130 cat_ears │ 66633│ g
131 pussy │ 65655│ g
132 penis │ 65416│ g
133 translated │ 62641│ g
134 cloud │ 62487│ g
135 2boys │ 62350│ g
136 fate_(series) │ 61959│ y
137 tongue │ 61950│ g
138 choker │ 61806│ g
139 solo_focus │ 60935│ g
140 belt │ 59819│ g
141 pink_eyes │ 59346│ g
142 cowboy_shot │ 59206│ g
143 scarf │ 57923│ g
144 yuri │ 56588│ g
145 chibi │ 56585│ g
146 hetero │ 56541│ g
147 fingerless_gloves │ 56297│ g
148 horns │ 55980│ g
149 flat_chest │ 55545│ g
150 cape │ 55354│ g
151 uniform │ 55253│ g
152 side_ponytail │ 55223│ g
153 zettai_ryouiki │ 54723│ g
154 sketch │ 54634│ g
155 puffy_sleeves │ 53606│ g
156 closed_mouth │ 53458│ g
157 star │ 53315│ g
158 socks │ 52378│ g
159 kimono │ 52234│ g
160 hair_flower │ 51979│ g
161 water │ 51794│ g
162 aqua_eyes │ 51708│ g
163 white_panties │ 51702│ g
164 multicolored_hair │ 50624│ g
165 twin_braids │ 50295│ g
166 black_eyes │ 50131│ g
167 armor │ 49927│ g
168 character_name │ 49546│ g
169 cum │ 49467│ g
170 parted_lips │ 49332│ g
171 outdoors │ 49288│ g
172 artist_request │ 49013│ g
173 white_gloves │ 48174│ g
174 sidelocks │ 47714│ g
175 spread_legs │ 47537│ g
176 :o │ 47468│ g
177 orange_hair │ 46429│ g
178 thighs │ 46283│ g
179 hatsune_miku │ 45989│ c
180 sleeveless │ 45835│ g
181 bag │ 45474│ g
182 bunny_ears │ 44657│ g
183 armpits │ 44631│ g
184 wide_sleeves │ 44616│ g
185 bra │ 44330│ g
186 huge_breasts │ 44298│ g
187 sweatdrop │ 44151│ g
188 necklace │ 43866│ g
189 on_back │ 43603│ g
190 apron │ 43530│ g
191 grin │ 43393│ g
192 feet │ 42736│ g
193 teeth │ 42727│ g
194 miniskirt │ 42640│ g
195 from_behind │ 42209│ g
196 covered_nipples │ 41918│ g
197 hood │ 41629│ g
198 copyright_request │ 41454│ g
199 tongue_out │ 41399│ g
200 book │ 41066│ g
201 artist_name │ 40585│ g
202 sex │ 40578│ g
203 vest │ 40510│ g
204 mahou_shoujo_madoka_magica │ 40037│ y
205 military │ 39903│ g
206 hakurei_reimu │ 39238│ c
207 hug │ 39098│ g
208 dark_skin │ 38667│ g
209 aqua_hair │ 38661│ g
210 lips │ 38626│ g
211 legs │ 38411│ g
212 maid │ 37994│ g
213 4girls │ 37959│ g
214 white_shirt │ 37892│ g
215 tree │ 37800│ g
216 idolmaster_cinderella_girls │ 37799│ y
217 black_gloves │ 37768│ g
218 bracelet │ 37715│ g
219 gun │ 37687│ g
220 kirisame_marisa │ 37635│ c
221 grey_hair │ 37492│ g
222 kneehighs │ 36737│ g
223 two_side_up │ 36335│ g
224 open_shirt │ 36012│ g
225 pants │ 35460│ g
226 cat_tail │ 35336│ g
227 hair_over_one_eye │ 35185│ g
228 absurdres │ 35164│ g
229 dated │ 34988│ g
230 petals │ 34946│ g
231 cosplay │ 34857│ g
232 high_heels │ 34829│ g
233 signature │ 34730│ g
234 shiny │ 34698│ g
235 magical_girl │ 34531│ g
236 kneeling │ 34375│ g
237 military_uniform │ 34307│ g
238 blunt_bangs │ 34013│ g
239 wrist_cuffs │ 33476│ g
240 ascot │ 33416│ g
241 one-piece_swimsuit │ 33330│ g
242 plaid │ 33175│ g
243 pokemon │ 33098│ y
244 dutch_angle │ 32717│ g
245 bowtie │ 32561│ g
246 witch_hat │ 32238│ g
247 vaginal │ 31842│ g
248 arms_up │ 31710│ g
249 fruit │ 31572│ g
250 blood │ 31311│ g
251 collar │ 31103│ g
252 nail_polish │ 31081│ g
253 traditional_media │ 31049│ g
254 hand_on_hip │ 31024│ g
255 maid_headdress │ 30899│ g
256 no_bra │ 30895│ g
257 pantyshot │ 30882│ g
258 sash │ 30732│ g
259 siblings │ 30724│ g
260 loli │ 30723│ g
261 headphones │ 30697│ g
262 6+girls │ 30563│ g
263 eyebrows │ 30491│ g
264 wet │ 30384│ g
265 no_panties │ 30372│ g
266 sweater │ 30323│ g
267 torn_clothes │ 29553│ g
268 remilia_scarlet │ 28868│ c
269 striped_legwear │ 28546│ g
270 bed │ 28541│ g
271 twitter_username │ 28396│ g
272 cup │ 28320│ g
273 hair_tubes │ 28078│ g
274 holding_hands │ 28017│ g
275 umbrella │ 27874│ g
276 copyright_name │ 27769│ g
277 alternate_hairstyle │ 27648│ g
278 pillow │ 27645│ g
279 parody │ 27501│ g
280 underboob │ 27482│ g
281 looking_at_another │ 27452│ g
282 headband │ 27344│ g
283 ^_^ │ 27245│ g
284 bottomless │ 27188│ g
285 single_braid │ 26955│ g
286 mole │ 26930│ g
287 izayoi_sakuya │ 26883│ c
288 sailor_collar │ 26839│ g
289 orange_eyes │ 26755│ g
290 drill_hair │ 26612│ g
291 headgear │ 26526│ g
292 sideboob │ 26460│ g
293 precure │ 26430│ y
294 bodysuit │ 26403│ g
295 white_dress │ 26285│ g
296 arm_up │ 26157│ g
297 v │ 25966│ g
298 bat_wings │ 25942│ g
299 saliva │ 25935│ g
300 no_humans │ 25931│ g
301 off_shoulder │ 25873│ g
302 mosaic_censoring │ 25695│ g
303 chain │ 25641│ g
304 indoors │ 25638│ g
305 capelet │ 25591│ g
306 :3 │ 25501│ g
307 moon │ 25432│ g
308 flandre_scarlet │ 25407│ c
309 see-through │ 25113│ g
310 neckerchief │ 25094│ g
311 double_bun │ 25048│ g
312 cover │ 25034│ g
313 groin │ 25028│ g
314 rose │ 24934│ g
315 school_swimsuit │ 24782│ g
316 from_side │ 24673│ g
317 translation_request │ 24588│ g
318 glowing │ 24570│ g
319 profile │ 24491│ g
320 lingerie │ 24436│ g
321 eyepatch │ 24380│ g
322 idolmaster_(classic) │ 24270│ y
323 wariza │ 24190│ g
324 pussy_juice │ 24160│ g
325 jojo_no_kimyou_na_bouken │ 24013│ y
326 thigh_boots │ 23950│ g
327 black_panties │ 23943│ g
328 pubic_hair │ 23886│ g
329 puffy_short_sleeves │ 23833│ g
330 mouth_hold │ 23761│ g
331 cameltoe │ 23657│ g
332 black_skirt │ 23593│ g
333 topless │ 23572│ g
334 shiny_skin │ 23506│ g
335 english │ 23098│ g
336 fate/grand_order │ 22952│ y
337 makeup │ 22931│ g
338 mask │ 22884│ g
339 leotard │ 22814│ g
340 hair_bobbles │ 22796│ g
341 bird │ 22740│ g
342 sparkle │ 22662│ g
343 night │ 22625│ g
344 bell │ 22486│ g
345 love_live! │ 22357│ y
346 holding_weapon │ 22305│ g
347 dress_shirt │ 22254│ g
348 leaf │ 22143│ g
349 hoodie │ 22080│ g
350 coat │ 21808│ g
351 hat_ribbon │ 21751│ g
352 side-tie_bikini │ 21742│ g
353 skirt_lift │ 21719│ g
354 v-shaped_eyebrows │ 21604│ g
355 face │ 21527│ g
356 blush_stickers │ 21430│ g
357 eating │ 21397│ g
358 striped_panties │ 21334│ g
359 pokemon_(game) │ 21204│ y
360 window │ 21188│ g
361 turtleneck │ 21102│ g
362 bdsm │ 21066│ g
363 5girls │ 21048│ g
364 bare_legs │ 20996│ g
365 couple │ 20931│ g
366 eyelashes │ 20927│ g
367 alice_margatroid │ 20900│ c
368 cat │ 20873│ g
369 plaid_skirt │ 20861│ g
370 shirt_lift │ 20805│ g
371 bound │ 20731│ g
372 symbol-shaped_pupils │ 20722│ g
373 black_dress │ 20714│ g
374 head_tilt │ 20505│ g
375 patchouli_knowledge │ 20414│ c
376 fox_ears │ 20368│ g
377 arm_support │ 20349│ g
378 leaning_forward │ 20217│ g
379 short_twintails │ 20205│ g
380 two-tone_hair │ 20182│ g
381 fox_tail │ 19966│ g
382 undressing │ 19898│ g
383 katana │ 19878│ g
384 formal │ 19834│ g
385 underwear_only │ 19812│ g
386 younger │ 19737│ g
387 blue_skirt │ 19686│ g
388 sleeping │ 19682│ g
389 crossed_arms │ 19627│ g
390 bandages │ 19513│ g
391 skirt_set │ 19442│ g
392 back │ 19409│ g
393 grey_eyes │ 19384│ g
394 gradient │ 19373│ g
395 outstretched_arms │ 19228│ g
396 fate/stay_night │ 19144│ y
397 heterochromia │ 19026│ g
398 grabbing │ 19020│ g
399 blazer │ 18995│ g
400 beret │ 18952│ g
401 knee_boots │ 18887│ g
402 embarrassed │ 18879│ g
403 from_above │ 18856│ g
404 crossover │ 18831│ g
405 sandals │ 18706│ g
406 tokin_hat │ 18675│ g
407 stuffed_toy │ 18654│ g
408 frown │ 18647│ g
409 wavy_mouth │ 18591│ g
410 bondage │ 18566│ g
411 garter_straps │ 18526│ g
412 love_live!_school_idol_project│ 18510│ y
413 chinese_clothes │ 18470│ g
414 yakumo_yukari │ 18463│ c
415 cirno │ 18453│ c
416 grey_background │ 18448│ g
417 kochiya_sanae │ 18422│ c
418 k-on! │ 18320│ y
419 sisters │ 18290│ g
420 blue_sky │ 18286│ g
421 cum_in_pussy │ 18161│ g
422 polearm │ 18117│ g
423 tattoo │ 18104│ g
424 collared_shirt │ 18096│ g
425 on_side │ 18091│ g
426 konpaku_youmu │ 18088│ c
427 chair │ 18074│ g
428 light_smile │ 18061│ g
429 anus │ 17940│ g
430 doujinshi │ 17897│ g
431 arms_behind_back │ 17764│ g
432 happy │ 17698│ g
433 pokemon_(creature) │ 17655│ g
434 wolf_ears │ 17642│ g
435 ocean │ 17639│ g
436 fingernails │ 17535│ g
437 mob_cap │ 17509│ g
438 obi │ 17497│ g
439 lipstick │ 17464│ g
440 red_bow │ 17464│ g
441 blurry │ 17415│ g
442 areolae │ 17407│ g
443 remodel_(kantai_collection) │ 17312│ g
444 gradient_hair │ 17309│ g
445 animal │ 17242│ g
446 red_ribbon │ 17180│ g
447 gradient_background │ 17125│ g
448 breast_grab │ 17102│ g
449 game_cg │ 17095│ g
450 genderswap │ 17031│ g
451 third_eye │ 17017│ g
452 thigh_gap │ 16999│ g
453 suspenders │ 16964│ g
454 facial_hair │ 16956│ g
455 cover_page │ 16953│ g
456 admiral_(kantai_collection) │ 16874│ c
457 crescent │ 16826│ g
458 short_shorts │ 16808│ g
459 expressionless │ 16669│ g
460 panty_pull │ 16666│ g
461 stuffed_animal │ 16638│ g
462 casual │ 16616│ g
463 akemi_homura │ 16610│ c
464 ;d │ 16572│ g
465 fangs │ 16540│ g
466 beach │ 16536│ g
467 mole_under_eye │ 16485│ g
468 child │ 16483│ g
469 speech_bubble │ 16458│ g
470 suit │ 16446│ g
471 cherry_blossoms │ 16413│ g
472 helmet │ 16391│ g
473 bent_over │ 16388│ g
474 shadow │ 16342│ g
475 fire │ 16328│ g
476 crop_top │ 16298│ g
477 official_art │ 16297│ g
478 multiple_tails │ 16258│ g
479 bike_shorts │ 16245│ g
480 shinkaisei-kan │ 16235│ g
481 knife │ 16181│ g
482 3boys │ 16147│ g
483 uncensored │ 16116│ g
484 scan │ 16105│ g
485 wind │ 16075│ g
486 otoko_no_ko │ 16050│ g
487 antenna_hair │ 16044│ g
488 blue_dress │ 16044│ g
489 from_below │ 16040│ g
490 polka_dot │ 15979│ g
491 final_fantasy │ 15926│ y
492 instrument │ 15926│ g
493 hands │ 15902│ g
494 wavy_hair │ 15887│ g
495 staff │ 15876│ g
496 >_< │ 15839│ g
497 red_neckwear │ 15825│ g
498 fujiwara_no_mokou │ 15784│ c
499 lyrical_nanoha │ 15761│ y
500 black_footwear │ 15508│ g
501 toes │ 15424│ g
502 hong_meiling │ 15402│ c
503 crossed_legs │ 15389│ g
504 stomach │ 15369│ g
505 pointing │ 15353│ g
506 backpack │ 15339│ g
507 shameimaru_aya │ 15333│ c
508 gundam │ 15295│ y
509 4koma │ 15295│ g
510 letterboxed │ 15288│ g
511 facial_mark │ 15278│ g
512 scar │ 15222│ g
513 oral │ 15166│ g
514 short_dress │ 15157│ g
515 no_pants │ 15099│ g
516 no_headwear │ 15030│ g
517 flying_sweatdrops │ 15019│ g
518 checkered │ 15015│ g
519 head_wings │ 15014│ g
520 kaname_madoka │ 14997│ c
521 watermark │ 14987│ g
522 strapless │ 14974│ g
523 text │ 14968│ g
524 convenient_censoring │ 14822│ g
525 :< │ 14815│ g
526 fur_trim │ 14764│ g
527 looking_away │ 14747│ g
528 pink_panties │ 14689│ g
529 trembling │ 14639│ g
530 sleeves_past_wrists │ 14625│ g
531 nature │ 14622│ g
532 detached_collar │ 14532│ g
533 sunlight │ 14516│ g
534 cum_on_body │ 14514│ g
535 monster_girl │ 14458│ g
536 buttons │ 14452│ g
537 cleavage_cutout │ 14387│ g
538 looking_to_the_side │ 14345│ g
539 feathers │ 14337│ g
540 depth_of_field │ 14319│ g
541 /\/\/\ │ 14292│ g
542 covering │ 14285│ g
543 gauntlets │ 14200│ g
544 foreshortening │ 14186│ g
545 suzumiya_haruhi_no_yuuutsu │ 14170│ y
546 world_witches_series │ 14157│ y
547 no_hat │ 14149│ g
548 table │ 14129│ g
549 kiss │ 14069│ g
550 bed_sheet │ 14069│ g
551 upskirt │ 14022│ g
552 reisen_udongein_inaba │ 14022│ c
553 outstretched_arm │ 14017│ g
554 ground_vehicle │ 14005│ g
555 komeiji_koishi │ 14001│ c
556 side-tie_panties │ 14000│ g
557 grass │ 13944│ g
558 fan │ 13887│ g
559 girls_und_panzer │ 13876│ y
560 tan │ 13857│ g
561 surprised │ 13840│ g
562 squatting │ 13801│ g
563 goggles │ 13648│ g
564 all_fours │ 13648│ g
565 mary_janes │ 13633│ g
566 broom │ 13614│ g
567 butterfly │ 13583│ g
568 microphone │ 13528│ g
569 pink_bow │ 13487│ g
570 blouse │ 13466│ g
571 adapted_costume │ 13461│ g
572 towel │ 13407│ g
573 red_skirt │ 13378│ g
574 full_moon │ 13370│ g
575 pale_skin │ 13297│ g
576 breast_press │ 13260│ g
577 twin_drills │ 13259│ g
578 phone │ 13257│ g
579 on_stomach │ 13250│ g
580 denim │ 13225│ g
581 looking_up │ 13222│ g
582 thigh_strap │ 13216│ g
583 bottle │ 13198│ g
584 bunnysuit │ 13197│ g
585 pov │ 13182│ g
586 plant │ 13180│ g
587 floral_print │ 13169│ g
588 lace │ 13156│ g
589 musical_note │ 13133│ g
590 saigyouji_yuyuko │ 13028│ c
591 crown │ 13018│ g
592 inubashiri_momiji │ 13006│ c
593 cardigan │ 12976│ g
594 genderswap_(mtf) │ 12972│ g
595 crying │ 12926│ g
596 granblue_fantasy │ 12911│ y
597 covered_navel │ 12859│ g
598 low_twintails │ 12850│ g
599 artoria_pendragon_(all) │ 12811│ c
600 peaked_cap │ 12806│ g
601 cross │ 12795│ g
602 dual_persona │ 12768│ g
603 komeiji_satori │ 12766│ c
604 blue_background │ 12689│ g
605 hair_bun │ 12630│ g
606 persona │ 12609│ y
607 bikini_top │ 12608│ g
608 hat_bow │ 12485│ g
609 ring │ 12480│ g
610 sheath │ 12479│ g
611 skindentation │ 12374│ g
612 angry │ 12308│ g
613 looking_down │ 12289│ g
614 muscle │ 12216│ g
615 red_dress │ 12173│ g
616 carrying │ 12132│ g
617 tank_top │ 12115│ g
618 machinery │ 12109│ g
619 parted_bangs │ 12108│ g
620 loafers │ 12106│ g
621 between_breasts │ 12097│ g
622 fellatio │ 12078│ g
623 mecha │ 12070│ g
624 scrunchie │ 12058│ g
625 shaded_face │ 11948│ g
626 heart-shaped_pupils │ 11907│ g
627 soles │ 11902│ g
628 lavender_hair │ 11893│ g
629 neon_genesis_evangelion │ 11873│ y
630 kemonomimi_mode │ 11843│ g
631 halterneck │ 11807│ g
632 front-tie_top │ 11794│ g
633 miki_sayaka │ 11785│ c
634 slit_pupils │ 11763│ g
635 pink_background │ 11757│ g
636 sleeveless_dress │ 11750│ g
637 wallpaper │ 11720│ g
638 open_jacket │ 11699│ g
639 fire_emblem │ 11692│ y
640 horn │ 11690│ g
641 one_side_up │ 11681│ g
642 demon_girl │ 11680│ g
643 headset │ 11645│ g
644 bob_cut │ 11612│ g
645 multiple_views │ 11612│ g
646 tiara │ 11594│ g
647 to_aru_majutsu_no_index │ 11586│ y
648 personification │ 11561│ g
649 glowing_eyes │ 11543│ g
650 nose_blush │ 11540│ g
651 cellphone │ 11499│ g
652 star_(sky) │ 11481│ g
653 snow │ 11398│ g
654 animal_print │ 11398│ g
655 :p │ 11322│ g
656 christmas │ 11319│ g
657 strap_slip │ 11276│ g
658 frilled_skirt │ 11257│ g
659 ? │ 11231│ g
660 fate/zero │ 11228│ y
661 scenery │ 11214│ g
662 neck_ribbon │ 11202│ g
663 bow_panties │ 11135│ g
664 ribbon_trim │ 11114│ g
665 straddling │ 11100│ g
666 floating_hair │ 11070│ g
667 girl_on_top │ 11021│ g
668 bare_arms │ 11015│ g
669 wet_clothes │ 10996│ g
670 long_legs │ 10989│ g
671 strike_witches │ 10985│ y
672 yakumo_ran │ 10970│ c
673 cum_on_upper_body │ 10955│ g
674 rumia │ 10919│ c
675 sunglasses │ 10919│ g
676 hand_up │ 10912│ g
677 armband │ 10898│ g
678 lolita_fashion │ 10850│ g
679 leg_up │ 10834│ g
680 anger_vein │ 10822│ g
681 rope │ 10808│ g
682 on_bed │ 10808│ g
683 brooch │ 10760│ g
684 tareme │ 10752│ g
685 kaga_(kantai_collection) │ 10740│ c
686 building │ 10738│ g
687 no_shoes │ 10718│ g
688 moriya_suwako │ 10687│ c
689 handgun │ 10646│ g
690 breast_hold │ 10630│ g
691 nontraditional_miko │ 10620│ g
692 kazami_yuuka │ 10598│ c
693 kagamine_rin │ 10567│ c
694 wolf_tail │ 10567│ g
695 sleeveless_shirt │ 10545│ g
696 santa_costume │ 10539│ g
697 hakama │ 10484│ g
698 corset │ 10447│ g
699 black_ribbon │ 10432│ g
700 sakura_kyouko │ 10408│ c
701 hand_on_own_chest │ 10397│ g
702 wristband │ 10395│ g
703 dual_wielding │ 10386│ g
704 facial │ 10360│ g
705 swept_bangs │ 10355│ g
706 tomoe_mami │ 10340│ c
707 china_dress │ 10305│ g
708 motor_vehicle │ 10301│ g
709 touken_ranbu │ 10284│ y
710 headwear_removed │ 10267│ g
711 saber │ 10251│ c
712 androgynous │ 10240│ g
713 drooling │ 10212│ g
714 pendant │ 10212│ g
715 candy │ 10204│ g
716 t-shirt │ 10194│ g
717 elf │ 10189│ g
718 curtains │ 10166│ g
719 konpaku_youmu_(ghost) │ 10143│ c
720 reiuji_utsuho │ 10106│ c
721 crossdressing │ 10071│ g
722 santa_hat │ 10052│ g
723 kamishirasawa_keine │ 10052│ c
724 forest │ 9966│ g
725 minigirl │ 9900│ g
726 clenched_teeth │ 9886│ g
727 ass_visible_through_thighs │ 9878│ g
728 multicolored │ 9862│ g
729 clenched_hand │ 9855│ g
730 bar_censor │ 9827│ g
731 white_bikini │ 9826│ g
732 skin_tight │ 9823│ g
733 half_updo │ 9812│ g
734 flying │ 9789│ g
735 sailor_dress │ 9768│ g
736 bespectacled │ 9766│ g
737 chen │ 9755│ c
738 o_o │ 9730│ g
739 highleg │ 9688│ g
740 bunny_tail │ 9678│ g
741 sweater_vest │ 9651│ g
742 rifle │ 9636│ g
743 gym_uniform │ 9615│ g
744 panties_under_pantyhose │ 9602│ g
745 kawashiro_nitori │ 9588│ c
746 breasts_outside │ 9574│ g
747 puffy_nipples │ 9571│ g
748 veil │ 9554│ g
749 tsurime │ 9545│ g
750 new_year │ 9545│ g
751 6+boys │ 9531│ g
752 box │ 9514│ g
753 cross-laced_footwear │ 9495│ g
754 licking │ 9470│ g
755 v_arms │ 9450│ g
756 claws │ 9439│ g
757 snake │ 9425│ g
758 standing_on_one_leg │ 9413│ g
759 bow_(weapon) │ 9387│ g
760 night_sky │ 9361│ g
761 buruma │ 9260│ g
762 fish │ 9234│ g
763 bridal_gauntlets │ 9173│ g
764 finger_to_mouth │ 9163│ g
765 folded_ponytail │ 9152│ g
766 gift │ 9114│ g
767 dress_lift │ 9113│ g
768 spear │ 9112│ g
769 teacup │ 9096│ g
770 red-framed_eyewear │ 9091│ g
771 waist_apron │ 9088│ g
772 twins │ 9065│ g
773 shimakaze_(kantai_collection) │ 9043│ c
774 cloudy_sky │ 9030│ g
775 cuffs │ 8999│ g
776 hinanawi_tenshi │ 8925│ c
777 shiny_hair │ 8907│ g
778 frilled_dress │ 8895│ g
779 tales_of_(series) │ 8887│ y
780 turret │ 8882│ g
781 anal │ 8879│ g
782 juliet_sleeves │ 8872│ g
783 cum_on_breasts │ 8862│ g
784 spot_color │ 8861│ g
785 hair_flaps │ 8834│ g
786 close-up │ 8823│ g
787 masturbation │ 8805│ g
788 ball │ 8793│ g
789 clothes_writing │ 8791│ g
790 hair_intakes │ 8757│ g
791 alcohol │ 8753│ g
792 bunny │ 8736│ g
793 fake_animal_ears │ 8728│ g
794 doujin_cover │ 8695│ g
795 skull │ 8691│ g
796 holding_sword │ 8687│ g
797 bubble │ 8684│ g
798 naughty_face │ 8647│ g
799 ... │ 8638│ g
800 jingle_bell │ 8628│ g
801 kaenbyou_rin │ 8609│ c
802 seiza │ 8601│ g
803 kill_la_kill │ 8591│ y
804 spiked_hair │ 8583│ g
805 older │ 8570│ g
806 sneakers │ 8568│ g
807 monster │ 8567│ g
808 cake │ 8558│ g
809 eye_contact │ 8552│ g
810 semi-rimless_eyewear │ 8543│ g
811 vertical_stripes │ 8533│ g
812 hat_removed │ 8532│ g
813 high_ponytail │ 8531│ g
814 halloween │ 8518│ g
815 black_wings │ 8495│ g
816 messy_hair │ 8493│ g
817 inazuma_eleven_(series) │ 8485│ y
818 lens_flare │ 8478│ g
819 partially_submerged │ 8477│ g
820 shield │ 8472│ g
821 tentacles │ 8465│ g
822 fishnets │ 8458│ g
823 breath │ 8446│ g
824 upside-down │ 8428│ g
825 card_(medium) │ 8413│ g
826 couch │ 8394│ g
827 abs │ 8386│ g
828 dog_ears │ 8384│ g
829 =_= │ 8326│ g
830 web_address │ 8326│ g
831 miko │ 8304│ g
832 tanline │ 8303│ g
833 kemono_friends │ 8266│ y
834 interlocked_fingers │ 8205│ g
835 rain │ 8195│ g
836 doll │ 8193│ g
837 wading │ 8176│ g
838 ice │ 8158│ g
839 sarashi │ 8126│ g
840 houraisan_kaguya │ 8087│ c
841 garter_belt │ 8077│ g
842 ribbed_sweater │ 8074│ g
843 kongou_(kantai_collection) │ 8002│ c
844 mahou_shoujo_lyrical_nanoha │ 7985│ y
845 geta │ 7966│ g
846 sleeves_rolled_up │ 7961│ g
847 steam │ 7947│ g
848 pink_dress │ 7943│ g
849 innertube │ 7893│ g
850 ghost │ 7887│ g
851 camisole │ 7869│ g
852 outstretched_hand │ 7868│ g
853 street_fighter │ 7862│ y
854 hair_over_shoulder │ 7850│ g
855 happy_birthday │ 7849│ g
856 side_slit │ 7840│ g
857 ofuda │ 7831│ g
858 shawl │ 7824│ g
859 hibiki_(kantai_collection) │ 7823│ c
860 wince │ 7808│ g
861 group_sex │ 7794│ g
862 mystia_lorelei │ 7793│ c
863 strapless_dress │ 7782│ g
864 koakuma │ 7775│ c
865 single_thighhigh │ 7772│ g
866 black_bikini │ 7763│ g
867 inaba_tewi │ 7759│ c
868 sunset │ 7744│ g
869 popsicle │ 7716│ g
870 starry_sky │ 7714│ g
871 tray │ 7705│ g
872 ibuki_suika │ 7699│ c
873 gem │ 7682│ g
874 hug_from_behind │ 7678│ g
875 track_jacket │ 7656│ g
876 border │ 7655│ g
877 yukata │ 7651│ g
878 brown_footwear │ 7649│ g
879 wide_hips │ 7648│ g
880 lucky_star │ 7646│ y
881 bandaid │ 7645│ g
882 frog │ 7598│ g
883 bloomers │ 7597│ g
884 brown_legwear │ 7593│ g
885 side_braid │ 7566│ g
886 hijiri_byakuren │ 7562│ c
887 smoke │ 7558│ g
888 green_skirt │ 7532│ g
889 megurine_luka │ 7522│ c
890 pantyshot_(sitting) │ 7495│ g
891 swimsuit_under_clothes │ 7492│ g
892 4boys │ 7485│ g
893 fingering │ 7485│ g
894 handjob │ 7456│ g
895 beard │ 7456│ g
896 inazuma_(kantai_collection) │ 7454│ c
897 paizuri │ 7453│ g
898 revealing_clothes │ 7450│ g
899 micro_bikini │ 7433│ g
900 hairpin │ 7425│ g
901 desk │ 7393│ g
902 fate/extra │ 7380│ y
903 contemporary │ 7352│ g
904 back-to-back │ 7350│ g
905 long_fingernails │ 7345│ g
906 dog │ 7341│ g
907 realistic │ 7337│ g
908 sharp_teeth │ 7336│ g
909 between_legs │ 7328│ g
910 underwater │ 7327│ g
911 zoom_layer │ 7306│ g
912 reflection │ 7300│ g
913 x_hair_ornament │ 7295│ g
914 gothic_lolita │ 7292│ g
915 jitome │ 7289│ g
916 pom_pom_(clothes) │ 7277│ g
917 ribbon-trimmed_sleeves │ 7265│ g
918 thick_thighs │ 7260│ g
919 akiyama_mio │ 7253│ c
920 testicles │ 7248│ g
921 mahou_shoujo_lyrical_nanoha_strikers│ 7223│ y
922 impossible_clothes │ 7220│ g
923 aircraft │ 7219│ g
924 pencil_skirt │ 7216│ g
925 spread_pussy │ 7182│ g
926 frilled_sleeves │ 7162│ g
927 top_hat │ 7155│ g
928 hitodama │ 7154│ g
929 low-tied_long_hair │ 7149│ g
930 bowl │ 7144│ g
931 hands_on_hips │ 7115│ g
932 after_sex │ 7098│ g
933 yasaka_kanako │ 7068│ c
934 tress_ribbon │ 7051│ g
935 pajamas │ 7041│ g
936 thong │ 7037│ g
937 monogatari_(series) │ 7032│ y
938 code_geass │ 7028│ y
939 gohei │ 7010│ g
940 tenryuu_(kantai_collection) │ 6994│ c
941 brother_and_sister │ 6985│ g
942 paw_pose │ 6976│ g
943 umineko_no_naku_koro_ni │ 6974│ y
944 tabard │ 6967│ g
945 hands_together │ 6963│ g
946 own_hands_together │ 6961│ g
947 everyone │ 6960│ g
948 curvy │ 6949│ g
949 running │ 6941│ g
950 antennae │ 6940│ g
951 legs_up │ 6933│ g
952 overwatch │ 6906│ y
953 epaulettes │ 6871│ g
954 sword_art_online │ 6861│ y
955 akagi_(kantai_collection) │ 6836│ c
956 baseball_cap │ 6835│ g
957 mole_under_mouth │ 6832│ g
958 smile_precure! │ 6818│ y
959 persona_4 │ 6814│ y
960 muneate │ 6811│ g
961 smirk │ 6806│ g
962 black_bra │ 6800│ g
963 light_brown_hair │ 6792│ g
964 piercing │ 6788│ g
965 kagamine_len │ 6777│ c
966 hair_rings │ 6773│ g
967 cloak │ 6771│ g
968 spread_arms │ 6767│ g
969 white_skirt │ 6754│ g
970 +_+ │ 6754│ g
971 butt_crack │ 6746│ g
972 |_| │ 6744│ g
973 mittens │ 6737│ g
974 curly_hair │ 6737│ g
975 walking │ 6729│ g
976 hair_down │ 6729│ g
977 goggles_on_head │ 6725│ g
978 blazblue │ 6721│ y
979 demon_tail │ 6717│ g
980 quaver │ 6711│ g
981 cigarette │ 6710│ g
982 ;) │ 6709│ g
983 zipper │ 6708│ g
984 arms_behind_head │ 6700│ g
985 injury │ 6683│ g
986 hand_on_another's_head │ 6682│ g
987 striped_bikini │ 6679│ g
988 kita_high_school_uniform │ 6665│ g
989 yagokoro_eirin │ 6662│ c
990 serious │ 6657│ g
991 angel_wings │ 6655│ g
992 school_bag │ 6653│ g
993 jumping │ 6640│ g
994 covering_breasts │ 6627│ g
995 cannon │ 6625│ g
996 clenched_hands │ 6622│ g
997 pose │ 6621│ g
998 half-closed_eyes │ 6618│ g
999 light_particles │ 6603│ g
1000 nazrin │ 6590│ c
In [31]:
def plot_count(most_popular_tags, steps, logx=True):
pop_tag_number = len(most_popular_tags)
_, pop_tag_count, _ = zip(*most_popular_tags)
rank_range = list(range(1, pop_tag_number + 1))
if logx:
plt.semilogy(rank_range, pop_tag_count)
else:
plt.plot(rank_range, pop_tag_count)
plt.xticks(rank_range[steps-1::steps], rank_range[steps-1::steps])
plt.xlim(0, pop_tag_number)
plt.ylabel("count")
plt.xlabel("rank")
if logx:
annotate_line = np.logspace(np.log10(pop_tag_count[steps // 4]), np.log10(pop_tag_count[-1]) * 1.1, 9)
else:
annotate_line = np.linspace(pop_tag_count[0] * 0.9, pop_tag_count[steps*9] * 1.1, 9)
for logy, i in zip(annotate_line, range(0, pop_tag_number - steps, steps)):
idx = random.randint(i, i + steps - 1)
random_rank = rank_range[idx]
random_count = pop_tag_count[idx]
random_tag = most_popular_tags[idx][0]
plt.annotate(random_tag, (random_rank, random_count), (i + steps / 2, logy),
arrowprops={"arrowstyle":"-|>"})
plt.title("Distribution of %d most popular tags" % pop_tag_number)
In [32]:
plot_count(tag_count, 100)
Very interesting! Even in log10 space these tags are not really linear distributed. Let's see if we can solve this with preprocessing or if there are some fancy tricks like weighting to take these differences in the distribution into account.
In [33]:
print("Important! These are the top 1000 tags (in descending order) where the word breast is included:")
breast_list = [tag for tag, _, _ in tag_count if tag.find("breast") >= 0]
print(reduce(list_to_string, breast_list))
print("In total these are %d tags!" % len(breast_list))
Important! These are the top 1000 tags (in descending order) where the word breast is included:
breasts, large_breasts, medium_breasts, small_breasts, huge_breasts, breast_grab, breast_press, between_breasts, breast_hold, breasts_outside, cum_on_breasts, covering_breasts
In total these are 12 tags!
In [34]:
def posts_with_tag(tagname):
tag_id = %sql SELECT id FROM tags WHERE name = :tagname
tag_id = tag_id[0][0]
post_id = %sql SELECT post_id FROM tagged WHERE tag_id = :tag_id
return post_id
In [35]:
posts_with_tag("headphones_on_breasts")
1 rows affected.
9 rows affected.
Out[35]:
post_id
460734
472435
596058
1022243
1040917
1062730
1278123
1445451
2666173
In [36]:
artist_count = %sql SELECT * FROM filtered_tags WHERE category = 'a' LIMIT 100
100 rows affected.
In [37]:
list_count(artist_count)
RANK NAME │ COUNT│ CATEGORY
───────────────────────────────────┼──────────┼─────────
1 hammer_(sunset_beach) │ 3548│ a
2 haruyama_kazunori │ 2808│ a
3 itomugi-kun │ 2255│ a
4 kouji_(campus_life) │ 2020│ a
5 yohane │ 1908│ a
6 ichimi │ 1717│ a
7 a1 │ 1702│ a
8 carnelian │ 1491│ a
9 shimazaki_mujirushi │ 1419│ a
10 kirisawa_juuzou │ 1414│ a
11 shino_(ponjiyuusu) │ 1411│ a
12 tonda │ 1392│ a
13 warugaki_(sk-ii) │ 1390│ a
14 dd_(ijigendd) │ 1385│ a
15 ueyama_michirou │ 1361│ a
16 masao │ 1358│ a
17 yua_(checkmate) │ 1302│ a
18 tanaka_takayuki │ 1292│ a
19 futa_(nabezoko) │ 1276│ a
20 mizuki_makoto │ 1253│ a
21 peko │ 1214│ a
22 mizuhara_aki │ 1212│ a
23 kantoku │ 1206│ a
24 niiko_(gonnzou) │ 1162│ a
25 onikobe_rin │ 1127│ a
26 blade_(galaxist) │ 1123│ a
27 katahira_masashi │ 1122│ a
28 hisahiko │ 1115│ a
29 gofu │ 1105│ a
30 agahari │ 1071│ a
31 tsukudani_(coke-buta) │ 1071│ a
32 bai_lao_shu │ 1066│ a
33 mizumoto_tadashi │ 1065│ a
34 tsuda_nanafushi │ 1062│ a
35 karaagetarou │ 1054│ a
36 niwatazumi │ 1049│ a
37 yokochou │ 1031│ a
38 hoshizuki_(seigetsu) │ 1022│ a
39 drawfag │ 1012│ a
40 zounose │ 999│ a
41 shichimenchou │ 973│ a
42 abubu │ 967│ a
43 tima │ 962│ a
44 mikage_takashi │ 960│ a
45 tsunako │ 957│ a
46 minaba_hideo │ 952│ a
47 koto_inari │ 945│ a
48 maruki_(punchiki) │ 927│ a
49 fumio_(rsqkr) │ 925│ a
50 otohime_(youngest_princess) │ 924│ a
51 unya │ 922│ a
52 itou_noiji │ 909│ a
53 iesupa │ 905│ a
54 bkub │ 905│ a
55 eroe │ 900│ a
56 ha_akabouzu │ 891│ a
57 frapowa │ 889│ a
58 houtengeki │ 865│ a
59 gaoo_(frpjx283) │ 864│ a
60 ilya_kuvshinov │ 850│ a
61 suzuhira_hiro │ 846│ a
62 catstudioinc_(punepuni) │ 845│ a
63 matsunaga_kouyou │ 844│ a
64 creayus │ 833│ a
65 nishi_koutarou │ 829│ a
66 takeuchi_takashi │ 825│ a
67 clearite │ 822│ a
68 sayori │ 810│ a
69 tomose_shunsaku │ 808│ a
70 meow_(nekodenki) │ 806│ a
71 mattaku_mousuke │ 805│ a
72 kuromiya │ 802│ a
73 seo_tatsuya │ 796│ a
74 minami_(colorful_palette) │ 789│ a
75 mishima_kurone │ 781│ a
76 koyama_shigeru │ 779│ a
77 fuantei │ 771│ a
78 shirosato │ 768│ a
79 cis_(carcharias) │ 767│ a
80 cato_(monocatienus) │ 767│ a
81 six_(fnrptal1010) │ 764│ a
82 gogiga_gagagigo │ 761│ a
83 kawashina_(momen_silicon) │ 757│ a
84 gomasamune │ 757│ a
85 ikari_manatsu │ 755│ a
86 mitsumoto_jouji │ 749│ a
87 rebecca_(keinelove) │ 748│ a
88 manji_(tenketsu) │ 737│ a
89 ebifurya │ 736│ a
90 chado │ 735│ a
91 homare_(fool's_art) │ 733│ a
92 matsuryuu │ 728│ a
93 bow_(bhp) │ 727│ a
94 yume_shokunin │ 723│ a
95 urushihara_satoshi │ 722│ a
96 hamu_koutarou │ 721│ a
97 ameyama_denshin │ 720│ a
98 onija_tarou │ 717│ a
99 kara_(color) │ 712│ a
100 satou_kibi │ 710│ a
In [38]:
plot_count(artist_count, 10, logx=False)
In [39]:
character_count = %sql SELECT * FROM filtered_tags WHERE category = 'c' LIMIT 100
100 rows affected.
In [40]:
list_count(character_count)
RANK NAME │ COUNT│ CATEGORY
───────────────────────────────────┼──────────┼─────────
1 hatsune_miku │ 45989│ c
2 hakurei_reimu │ 39238│ c
3 kirisame_marisa │ 37635│ c
4 remilia_scarlet │ 28868│ c
5 izayoi_sakuya │ 26883│ c
6 flandre_scarlet │ 25407│ c
7 alice_margatroid │ 20900│ c
8 patchouli_knowledge │ 20414│ c
9 yakumo_yukari │ 18463│ c
10 cirno │ 18453│ c
11 kochiya_sanae │ 18422│ c
12 konpaku_youmu │ 18088│ c
13 admiral_(kantai_collection) │ 16874│ c
14 akemi_homura │ 16610│ c
15 fujiwara_no_mokou │ 15784│ c
16 hong_meiling │ 15402│ c
17 shameimaru_aya │ 15333│ c
18 kaname_madoka │ 14997│ c
19 reisen_udongein_inaba │ 14022│ c
20 komeiji_koishi │ 14001│ c
21 saigyouji_yuyuko │ 13028│ c
22 inubashiri_momiji │ 13006│ c
23 artoria_pendragon_(all) │ 12811│ c
24 komeiji_satori │ 12766│ c
25 miki_sayaka │ 11785│ c
26 yakumo_ran │ 10970│ c
27 rumia │ 10919│ c
28 kaga_(kantai_collection) │ 10740│ c
29 moriya_suwako │ 10687│ c
30 kazami_yuuka │ 10598│ c
31 kagamine_rin │ 10567│ c
32 sakura_kyouko │ 10408│ c
33 tomoe_mami │ 10340│ c
34 saber │ 10251│ c
35 konpaku_youmu_(ghost) │ 10143│ c
36 reiuji_utsuho │ 10106│ c
37 kamishirasawa_keine │ 10052│ c
38 chen │ 9755│ c
39 kawashiro_nitori │ 9588│ c
40 shimakaze_(kantai_collection) │ 9043│ c
41 hinanawi_tenshi │ 8925│ c
42 kaenbyou_rin │ 8609│ c
43 houraisan_kaguya │ 8087│ c
44 kongou_(kantai_collection) │ 8002│ c
45 hibiki_(kantai_collection) │ 7823│ c
46 mystia_lorelei │ 7793│ c
47 koakuma │ 7775│ c
48 inaba_tewi │ 7759│ c
49 ibuki_suika │ 7699│ c
50 hijiri_byakuren │ 7562│ c
51 megurine_luka │ 7522│ c
52 inazuma_(kantai_collection) │ 7454│ c
53 akiyama_mio │ 7253│ c
54 yasaka_kanako │ 7068│ c
55 tenryuu_(kantai_collection) │ 6994│ c
56 akagi_(kantai_collection) │ 6836│ c
57 kagamine_len │ 6777│ c
58 yagokoro_eirin │ 6662│ c
59 nazrin │ 6590│ c
60 houjuu_nue │ 6464│ c
61 nakano_azusa │ 6441│ c
62 ikazuchi_(kantai_collection) │ 6377│ c
63 morichika_rinnosuke │ 6225│ c
64 mizuhashi_parsee │ 6170│ c
65 souryuu_asuka_langley │ 6131│ c
66 hirasawa_yui │ 5961│ c
67 shanghai_doll │ 5921│ c
68 tatara_kogasa │ 5915│ c
69 shigure_(kantai_collection) │ 5896│ c
70 nagato_yuki │ 5860│ c
71 kagiyama_hina │ 5838│ c
72 fate_testarossa │ 5837│ c
73 kyubey │ 5634│ c
74 akatsuki_(kantai_collection) │ 5446│ c
75 hoshiguma_yuugi │ 5332│ c
76 wriggle_nightbug │ 5251│ c
77 suzumiya_haruhi │ 5242│ c
78 toosaka_rin │ 5212│ c
79 tainaka_ritsu │ 5197│ c
80 nagato_(kantai_collection) │ 5180│ c
81 toyosatomimi_no_miko │ 5165│ c
82 mononobe_no_futo │ 5014│ c
83 haruna_(kantai_collection) │ 4953│ c
84 yuudachi_(kantai_collection) │ 4925│ c
85 onozuka_komachi │ 4814│ c
86 takamachi_nanoha │ 4736│ c
87 northern_ocean_hime │ 4720│ c
88 fubuki_(kantai_collection) │ 4642│ c
89 zuikaku_(kantai_collection) │ 4589│ c
90 toramaru_shou │ 4480│ c
91 shiki_eiki │ 4462│ c
92 misaka_mikoto │ 4381│ c
93 nishikino_maki │ 4360│ c
94 daiyousei │ 4342│ c
95 matoi_ryuuko │ 4272│ c
96 nagae_iku │ 4139│ c
97 yazawa_nico │ 4048│ c
98 hamakaze_(kantai_collection) │ 4039│ c
99 kotobuki_tsumugi │ 4026│ c
100 atago_(kantai_collection) │ 4021│ c
In [41]:
plot_count(character_count, 10, logx=False)
In [42]:
series_count = %sql SELECT * FROM filtered_tags WHERE category = 'y' LIMIT 100
100 rows affected.
In [43]:
list_count(series_count)
RANK NAME │ COUNT│ CATEGORY
───────────────────────────────────┼──────────┼─────────
1 touhou │ 453860│ y
2 original │ 264886│ y
3 kantai_collection │ 201925│ y
4 vocaloid │ 69221│ y
5 idolmaster │ 68414│ y
6 fate_(series) │ 61959│ y
7 mahou_shoujo_madoka_magica │ 40037│ y
8 idolmaster_cinderella_girls │ 37799│ y
9 pokemon │ 33098│ y
10 precure │ 26430│ y
11 idolmaster_(classic) │ 24270│ y
12 jojo_no_kimyou_na_bouken │ 24013│ y
13 fate/grand_order │ 22952│ y
14 love_live! │ 22357│ y
15 pokemon_(game) │ 21204│ y
16 fate/stay_night │ 19144│ y
17 love_live!_school_idol_project│ 18510│ y
18 k-on! │ 18320│ y
19 final_fantasy │ 15926│ y
20 lyrical_nanoha │ 15761│ y
21 gundam │ 15295│ y
22 suzumiya_haruhi_no_yuuutsu │ 14170│ y
23 world_witches_series │ 14157│ y
24 girls_und_panzer │ 13876│ y
25 granblue_fantasy │ 12911│ y
26 persona │ 12609│ y
27 neon_genesis_evangelion │ 11873│ y
28 fire_emblem │ 11692│ y
29 to_aru_majutsu_no_index │ 11586│ y
30 fate/zero │ 11228│ y
31 strike_witches │ 10985│ y
32 touken_ranbu │ 10284│ y
33 tales_of_(series) │ 8887│ y
34 kill_la_kill │ 8591│ y
35 inazuma_eleven_(series) │ 8485│ y
36 kemono_friends │ 8266│ y
37 mahou_shoujo_lyrical_nanoha │ 7985│ y
38 street_fighter │ 7862│ y
39 lucky_star │ 7646│ y
40 fate/extra │ 7380│ y
41 mahou_shoujo_lyrical_nanoha_strikers│ 7223│ y
42 monogatari_(series) │ 7032│ y
43 code_geass │ 7028│ y
44 umineko_no_naku_koro_ni │ 6974│ y
45 overwatch │ 6906│ y
46 sword_art_online │ 6861│ y
47 smile_precure! │ 6818│ y
48 persona_4 │ 6814│ y
49 blazblue │ 6721│ y
50 tengen_toppa_gurren_lagann │ 6586│ y
51 to_aru_kagaku_no_railgun │ 6549│ y
52 bishoujo_senshi_sailor_moon │ 6464│ y
53 rozen_maiden │ 6462│ y
54 tiger_&_bunny │ 6055│ y
55 league_of_legends │ 5763│ y
56 shingeki_no_kyojin │ 5706│ y
57 danganronpa │ 5682│ y
58 pokemon_sm │ 5547│ y
59 the_legend_of_zelda │ 5462│ y
60 higurashi_no_naku_koro_ni │ 5367│ y
61 inazuma_eleven_go │ 5342│ y
62 saki │ 5115│ y
63 ore_no_imouto_ga_konna_ni_kawaii_wake_ga_nai│ 5078│ y
64 fate/apocrypha │ 5016│ y
65 dragon_quest │ 5011│ y
66 macross │ 4949│ y
67 naruto │ 4846│ y
68 touhou_(pc-98) │ 4789│ y
69 splatoon │ 4613│ y
70 yuu-gi-ou │ 4568│ y
71 guilty_gear │ 4425│ y
72 pokemon_bw │ 4404│ y
73 idolmaster_million_live! │ 4365│ y
74 persona_3 │ 4328│ y
75 re:zero_kara_hajimeru_isekai_seikatsu│ 4294│ y
76 rebuild_of_evangelion │ 4269│ y
77 tsukihime │ 4210│ y
78 gochuumon_wa_usagi_desu_ka? │ 4193│ y
79 neptune_(series) │ 4173│ y
80 to_heart_2 │ 4111│ y
81 bakemonogatari │ 4075│ y
82 mahou_shoujo_lyrical_nanoha_a's│ 3990│ y
83 little_busters! │ 3976│ y
84 macross_frontier │ 3971│ y
85 dokidoki!_precure │ 3951│ y
86 yuru_yuri │ 3947│ y
87 axis_powers_hetalia │ 3919│ y
88 the_king_of_fighters │ 3901│ y
89 vampire_(game) │ 3896│ y
90 love_live!_sunshine!! │ 3860│ y
91 one_piece │ 3821│ y
92 heartcatch_precure! │ 3795│ y
93 pixiv_fantasia │ 3793│ y
94 splatoon_1 │ 3743│ y
95 clannad │ 3730│ y
96 black_rock_shooter │ 3683│ y
97 fire_emblem_if │ 3661│ y
98 ragnarok_online │ 3612│ y
99 puzzle_&_dragons │ 3591│ y
100 gundam_00 │ 3579│ y
In [44]:
plot_count(series_count, 10)
There are some tags than can not be easily inferred from just looking on the images, that do not correspont with the use cases I have in mind or that train on useless features. Better to either remove such posts or just remove these tags.
I looked through the first 500 tags from the list above and took note of the following ones:
It might also be a good idea to remove the tags for series (category y) because I don't know why this would be useful. The same for artists but since these are not in the TOP 1000 it should be no problem either way.
In [45]:
%%sql
CREATE OR REPLACE TEMPORARY VIEW final_posts AS
SELECT *
FROM filtered_images
WHERE id NOT IN
(SELECT DISTINCT post_id
FROM tagged
WHERE tag_id
IN
(SELECT id as tag_id FROM tags
WHERE name = 'comic'
OR name = 'no_humans'
OR name = '4koma'
OR name = 'photo'
OR name = '3d'))
ORDER BY id ASC
Done.
Out[45]:
[]
In [46]:
final_posts = %sql SELECT id, rating, file_ext FROM final_posts
2061570 rows affected.
In [47]:
%%sql
SELECT tags.id, tags.name, tag_count.count
FROM
(
SELECT tagged.tag_id, COUNT(tagged.post_id)
FROM
final_posts
INNER JOIN
tagged
ON final_posts.id = tagged.post_id
GROUP BY tagged.tag_id
HAVING COUNT(tagged.post_id) >= 10000
) tag_count
INNER JOIN
tags
ON tag_count.tag_id = tags.id
WHERE tags.category = 'g'
ORDER BY tag_count.count DESC
615 rows affected.
Out[47]:
id
name
count
4
1girl
1523817
36
solo
1314438
18
long_hair
951022
210
breasts
628220
8
blush
601991
59
smile
587298
184
short_hair
581007
20
looking_at_viewer
466356
412
open_mouth
451815
69
blue_eyes
442677
94
blonde_hair
436842
10
brown_hair
404691
180
multiple_girls
394516
35
skirt
379425
134
thighhighs
348087
28
red_eyes
337376
100
hat
334600
93
black_hair
332624
225
large_breasts
311943
30
ribbon
278870
169
2girls
270690
785
gloves
266168
45
dress
264096
99
hair_ornament
256368
427
brown_eyes
255377
9
bow
246489
114
twintails
241604
31
school_uniform
229344
87
underwear
229247
34
simple_background
226689
90
1boy
225371
407
green_eyes
217244
124
navel
210300
574
blue_hair
207161
383
sitting
201977
81
panties
198803
281
medium_breasts
195581
1344
cleavage
191663
606
animal_ears
188988
37
white_background
180013
289
shirt
173592
2285
highres
171385
20892
bad_id
171383
46537
bad_pixiv_id
170385
55
purple_eyes
169097
789
jewelry
165226
557
weapon
163563
438
very_long_hair
162347
232
nipples
156102
484
bare_shoulders
155967
147
hair_ribbon
154948
19
long_sleeves
154779
329
purple_hair
148267
6
black_legwear
146693
197
bangs
145232
465
pink_hair
145095
934
yellow_eyes
142528
84
ponytail
141141
358
closed_eyes
137079
57
silver_hair
133115
2496
flower
131860
649
tail
130156
14
hair_bow
129047
1649
swimsuit
128628
68
ass
124217
667
boots
120821
501
red_hair
119591
917
wings
119431
151
pantyhose
118776
74
green_hair
117396
1207
hairband
116572
526
braid
114917
491
one_eye_closed
106445
7818
male_focus
103141
305
ahoge
102941
2331
detached_sleeves
102704
97
glasses
101661
4902
japanese_clothes
100950
826
monochrome
99525
395
:d
99338
6166
heart
98451
183
serafuku
97791
48
holding
97469
46
full_body
97074
198
barefoot
96782
404
food
96370
462
nude
96132
61
standing
94281
953
necktie
92022
21
lying
90858
619
collarbone
88468
1629
bikini
88331
588
multiple_boys
87310
416
shoes
86116
555
sword
83992
3492
white_hair
82783
6546
jacket
80592
1896
midriff
80587
335
small_breasts
80370
153
pleated_skirt
79969
646
striped
79775
340
upper_body
78486
137
white_legwear
77954
784
elbow_gloves
77537
334
sky
76060
315
eyebrows_visible_through_hair
75110
813
censored
74547
4447
frills
74022
290
short_sleeves
73655
1139
tears
72525
79
looking_back
72269
783
earrings
71109
245
sweat
70034
1233
open_clothes
67010
4423
pointy_ears
65306
879
fang
65213
595
shorts
64656
314
day
64169
220
hair_between_eyes
63336
830
pussy
63318
616
cat_ears
62933
4764
alternate_costume
62731
1782
hairclip
61823
1994
penis
61295
243
solo_focus
60048
2100
choker
59712
653
tongue
58902
269
cowboy_shot
58741
2809
belt
58393
718
pink_eyes
58038
313
cloud
57784
445
3girls
57062
1025
flat_chest
54366
10907
fingerless_gloves
54178
2698
scarf
54034
571
2boys
53787
300
zettai_ryouiki
53402
1989
hetero
52955
833
sketch
52683
267
closed_mouth
51480
4275
cape
51471
43
chibi
51200
106
socks
51162
499
puffy_sleeves
51093
664
aqua_eyes
51051
138
white_panties
51044
6367
horns
50403
108
star
50212
474
yuri
50155
999
greyscale
49676
9202
hair_flower
49547
473
water
49137
4903
kimono
48663
669
character_name
48579
7024
black_eyes
48500
327
parted_lips
48245
792
multicolored_hair
48127
2117
cum
47573
471
side_ponytail
47377
3320
white_gloves
47275
521
armor
46953
308
artist_request
46541
1108
spread_legs
46403
326
outdoors
46101
652
thighs
45909
2066
twin_braids
45612
493
orange_hair
45183
5734
uniform
45086
193
:o
44368
481
armpits
44031
33
sidelocks
43666
4901
huge_breasts
43414
505
sleeveless
43263
1227
bra
43061
2504
necklace
42420
219
feet
42258
1399
on_back
42141
12081
wide_sleeves
41855
92
bag
41559
782
covered_nipples
41440
1711
bunny_ears
41309
282
miniskirt
41249
2631
grin
40882
817
from_behind
40402
38413
copyright_request
39663
654
tongue_out
39622
963
apron
39467
965
artist_name
38708
7435
hood
38492
759
aqua_hair
38093
1549
legs
38060
229
lips
37976
2711
sex
37944
1962
teeth
37883
296
vest
37558
9811
dark_skin
37475
733
book
37328
6153
black_gloves
36969
299
white_shirt
36928
13686
bracelet
36870
670
grey_hair
35648
175
hug
35567
1234
open_shirt
35375
16
kneehighs
35356
950
maid
34570
5380
gun
34476
1490
petals
34434
820
high_heels
34309
4919
shiny
34163
34460
absurdres
33894
15835
pants
33879
618
cat_tail
33645
532
dated
33587
1783
kneeling
33426
2345
two_side_up
33413
1870
signature
33193
391
tree
33158
2379
hair_over_one_eye
33100
2269
wrist_cuffs
32788
2053
one-piece_swimsuit
32646
338
translated
32514
3846
magical_girl
32308
4371
dutch_angle
32257
579
cosplay
31680
597
sweatdrop
31677
26
plaid
31418
264
blunt_bangs
31135
3664
nail_polish
30819
2637
no_bra
30487
8952
military
30471
352
bowtie
30430
2276
arms_up
30403
765
pantyshot
30268
2124
loli
30055
1307
hand_on_hip
29964
793
no_panties
29923
2714
vaginal
29892
8096
headphones
29813
4346
collar
29769
3037
ascot
29654
4918
sash
29632
406
fruit
29513
254
wet
29248
537
eyebrows
29152
1674
witch_hat
29106
1739
sweater
28799
185
siblings
28719
839
traditional_media
28518
8474
striped_legwear
27994
951
maid_headdress
27684
2343
torn_clothes
27679
529
copyright_name
27170
2091
underboob
27164
13655133
twitter_username
26538
3865
holding_hands
26532
2659
bottomless
26514
1167
sideboob
26124
310
blood
26082
8953
military_uniform
26078
2975
4girls
26074
201
bed
26060
1670
orange_eyes
25987
1210
pillow
25837
869
umbrella
25751
2723
bodysuit
25709
21821
white_dress
25590
2430
mole
25515
2274
alternate_hairstyle
25396
11320
off_shoulder
25339
509
v
25260
307
arm_up
25232
1992
mosaic_censoring
25163
7179
groin
24803
4584
see-through
24789
9635
chain
24705
6549
looking_at_another
24699
968
cup
24660
4898
headband
24622
429
hair_tubes
24580
33514
sailor_collar
24550
402
drill_hair
24451
3443
rose
24446
2699
single_braid
24185
12
capelet
24069
530
cover
23897
1403
saliva
23859
890
moon
23851
1230
lingerie
23837
316
from_side
23764
702
wariza
23740
1421
black_panties
23674
837
thigh_boots
23584
2059
school_swimsuit
23582
3104
6+girls
23560
224
indoors
23525
1096
cameltoe
23437
2252
bat_wings
23382
1194
mouth_hold
23261
4921
shiny_skin
23245
2796
pussy_juice
23227
2328
black_skirt
23054
237
pubic_hair
23012
976
profile
22999
1435
parody
22811
252
topless
22747
1987
double_bun
22625
3307
leotard
22540
280
makeup
22440
900
puffy_short_sleeves
22313
10137
glowing
21971
1644
side-tie_bikini
21528
548
holding_weapon
21463
1616
skirt_lift
21163
2630
dress_shirt
21057
2849
:3
21003
648
striped_panties
20993
4081
bell
20976
8255
night
20969
283
neckerchief
20926
98
hair_bobbles
20867
2332
eyepatch
20698
2500
leaf
20687
846
^_^
20660
28071
headgear
20636
8086
bare_legs
20619
16217
coat
20580
7436
hoodie
20477
857
hat_ribbon
20454
2181
turtleneck
20443
199
bdsm
20423
77462
eyelashes
20403
119
bird
20375
2087
shirt_lift
20368
1779
face
20323
19118
mask
20238
4475
black_dress
20113
5
arm_support
20078
319
head_tilt
19999
8468
plaid_skirt
19870
206
bound
19819
4452
leaning_forward
19788
24972
symbol-shaped_pupils
19669
3458
english
19428
85640
two-tone_hair
19419
8152
couple
19402
1877
underwear_only
19341
1482
fox_ears
19246
690
blue_skirt
19238
1765
sparkle
19234
1547
katana
19119
15934715
v-shaped_eyebrows
19064
7904
back
18988
1250
undressing
18922
986
window
18694
6169
knee_boots
18610
2943
grey_eyes
18596
2281
gradient
18579
1483
fox_tail
18531
3568
heterochromia
18387
818
garter_straps
18311
1063
from_above
18301
12075
skirt_set
18287
2355
cat
18195
1959
short_twintails
18189
3670
sandals
18158
311
blush_stickers
18143
2759
grey_background
18077
204
bondage
18034
6599
eating
17978
1693
outstretched_arms
17941
2324
bandages
17891
111
stuffed_toy
17879
4481
formal
17859
693
light_smile
17849
15838
tattoo
17733
6538
blazer
17708
457
grabbing
17626
272
frown
17596
25
on_side
17549
10827
beret
17469
1892
crossed_arms
17460
2756
cum_in_pussy
17435
187
sisters
17240
196
arms_behind_back
17231
3048
chinese_clothes
17195
195
areolae
17182
70536
blue_sky
17167
15102
lipstick
17153
9816
fingernails
17143
373
red_bow
17074
3503
game_cg
17065
4365
blurry
17046
1348
embarrassed
17012
5741
anus
16964
1511
polearm
16899
4911
obi
16881
650
thigh_gap
16843
903
red_ribbon
16745
136030
tokin_hat
16736
268
collared_shirt
16732
594
short_shorts
16551
385
sleeping
16517
1533
happy
16507
786
gradient_hair
16423
72416
wolf_ears
16422
2282
gradient_background
16413
1163
panty_pull
16317
531
cover_page
16126
1574
crossover
16125
9346
beach
16001
1420
bent_over
15988
109
stuffed_animal
15964
9127
crop_top
15952
4494
younger
15945
450
breast_grab
15915
2851
;d
15905
1479
cherry_blossoms
15904
1702
translation_request
15820
889
mob_cap
15753
1327
uncensored
15733
357
chair
15719
270
expressionless
15691
2186
wind
15679
13636
from_below
15671
18751
suspenders
15652
13199
ocean
15635
6151
bike_shorts
15627
2431
mole_under_eye
15582
524
blue_dress
15578
4169
official_art
15532
496
polka_dot
15524
8203
staff
15496
32
shadow
15492
37428
wavy_mouth
15489
2299
antenna_hair
15466
166327
third_eye
15460
374
red_neckwear
15449
3029
5girls
15445
2352
animal
15437
665
black_footwear
15369
4415
instrument
15286
3303
hands
15273
297
wavy_hair
15256
249
toes
15253
592
otoko_no_ko
15241
11791
stomach
15105
11936
crossed_legs
14997
10892
child
14979
8118
casual
14929
2991
crescent
14928
80
no_pants
14850
2948
knife
14840
30737
facial_mark
14824
832
short_dress
14822
15101
helmet
14796
4491
suit
14705
7031
letterboxed
14675
1648
strapless
14662
1486
multiple_tails
14585
2279
fire
14568
83
pink_panties
14524
12950
fur_trim
14410
4510
genderswap
14380
212
checkered
14375
8330
detached_collar
14333
31390752
remodel_(kantai_collection)
14268
2118
cum_on_body
14262
5459
scan
14189
454
convenient_censoring
14173
8934
cleavage_cutout
14126
9948
fangs
14112
6291
facial_hair
14107
4370
depth_of_field
14073
1937
looking_away
14006
10909
foreshortening
13944
2645
watermark
13912
11326
sleeves_past_wrists
13907
3342
feathers
13874
9326
covering
13852
12641
looking_to_the_side
13842
1993
oral
13833
11
buttons
13825
11422
backpack
13819
3545
side-tie_panties
13793
542
gauntlets
13776
113
sunlight
13761
136
upskirt
13727
18063
scar
13711
12923
head_wings
13639
2547
bed_sheet
13599
1322
tan
13502
1671
pointing
13462
39842
monster_girl
13446
6864
outstretched_arm
13435
3136
mary_janes
13366
4236
3boys
13300
413
pink_bow
13294
107
squatting
13272
34230
butterfly
13255
17550
adapted_costume
13169
1488
no_headwear
13134
8332
fan
13082
806
all_fours
13056
768
thigh_strap
13032
10743
bunnysuit
13013
29
red_skirt
13009
19656
floral_print
12969
9707
goggles
12964
1423
breast_press
12954
1812
lace
12926
7425
denim
12901
2131
pov
12879
1717
microphone
12860
2145
:<
12804
3348
pale_skin
12774
2049
covered_navel
12752
881
full_moon
12740
1529
blouse
12646
51172
cardigan
12591
4215
on_stomach
12552
4107
nature
12534
24856
looking_up
12488
1934
blue_background
12443
2941
crown
12440
3955
grass
12400
44
cross
12386
3849
pokemon_(creature)
12358
2153
broom
12336
1487
no_hat
12319
20861
skindentation
12308
84687
bikini_top
12246
472
towel
12208
1634
dual_persona
12169
9680
sheath
12149
3507
plant
11982
1927
twin_drills
11959
17892
red_dress
11955
3683
bottle
11865
36789
between_breasts
11851
1066
loafers
11830
361
ground_vehicle
11820
2008
hair_bun
11817
12642
low_twintails
11796
16151
soles
11762
1857
front-tie_top
11680
13999
halterneck
11650
556
text
11617
4511
genderswap_(mtf)
11615
19574
ring
11614
17803
looking_down
11568
6547
kiss
11562
1070
pink_background
11548
636
parted_bangs
11509
18715
lavender_hair
11506
2514
wallpaper
11475
24967
heart-shaped_pupils
11472
2102
demon_girl
11469
111143
headset
11442
9579
tank_top
11428
4014
multiple_views
11353
6527
tiara
11348
16056
open_jacket
11278
7027
bob_cut
11233
8160
muscle
11220
21763
phone
11189
24561
sleeveless_dress
11180
626
kemonomimi_mode
11162
7923
strap_slip
11158
2946
hat_bow
11126
4948
scrunchie
11121
1988
fellatio
11119
23079
frilled_skirt
11090
7319
:p
11079
1750
animal_print
11078
7726
christmas
11063
891
one_side_up
10959
23905
floating_hair
10950
1718
musical_note
10932
2238
long_legs
10925
5268
carrying
10904
981
table
10850
613
bow_panties
10819
31155946
shinkaisei-kan
10784
483
bare_arms
10782
10368
personification
10777
2120
cum_on_upper_body
10768
1439
straddling
10719
28541
leg_up
10672
11539
wet_clothes
10664
12144
slit_pupils
10658
8260
ribbon_trim
10641
4650
trembling
10567
23
no_shoes
10553
3634
armband
10551
3525
girl_on_top
10544
3364
lolita_fashion
10541
131
snow
10526
4038
breast_hold
10456
24
nose_blush
10452
74616
tareme
10450
32000
star_(sky)
10444
1339
>_<
10371
275
hand_up
10361
28500
on_bed
10278
1573
corset
10233
26556
black_ribbon
10206
6128
brooch
10177
3315
santa_costume
10152
5045
rope
10122
5384
handgun
10116
12687
wristband
10081
2742
facial
10054
248992
wolf_tail
10052
10138
glowing_eyes
10046
174
hand_on_own_chest
10003
In [48]:
final_tag_count = _
In [49]:
final_posts = np.array(list(map(tuple, final_posts)),
dtype=[("id", np.int32), ("rating", "<U1"), ("file_ext", "<U4")])
final_tag_count = np.array(list(map(tuple, final_tag_count)),
dtype=[("id", np.int32), ("name", '<U29'), ("count", np.int32)])
In [51]:
pd.DataFrame(final_posts).to_hdf("metadata.h5", "posts", mode="a", complevel=9, complib="bzip2")
pd.DataFrame(final_tag_count).to_hdf("metadata.h5", "tag_count", mode="a", complevel=9, complib="bzip2")
Content source: leyhline/WaifuNet
Similar notebooks: