In [7]:
import pandas as pd
data = pd.read_csv("../../linux/gitsubject.log", sep="\n", header=None, encoding="utf-8")
s = data[0]
s


Out[7]:
0                                            Linux-4.13-rc5
1                   mtd-blkdevs-Fix-mtd-block-write-failure
2         MD-not-clear-safemode-for-external-metadata-array
3                  pnfs-blocklayout-require-64-bit-sector_t
4              selftests-timers-freq-step-fix-compile-error
5         iommu-arm-smmu-fix-null-pointer-dereference-in...
6         xen-events-Fix-interrupt-lost-during-irq_disab...
7                              xen-avoid-deadlock-in-xenbus
8                      xen-fix-hvm-guest-with-kaslr-enabled
9                     xen-split-up-xen_hvm_init_shared_info
10          x86-provide-an-init_mem_mapping-hypervisor-hook
11        fuse-set-mapping-error-in-writepage_locked-whe...
12        userfaultfd-replace-ENOSPC-with-ESRCH-in-case-...
13        zram-rework-copy-of-compressor-name-in-comp_al...
14        rmap-do-not-call-mmu_notifier_invalidate_page-...
15              mm-fix-list-corruptions-on-shmem-shrinklist
16        mm-balloon_compaction.c-don-t-zero-ballooned-p...
17          MAINTAINERS-copy-virtio-on-balloon_compaction.c
18                               mm-fix-KSM-data-corruption
19        mm-fix-MADV_-FREE-DONTNEED-TLB-flush-miss-problem
20                         mm-make-tlb_flush_pending-global
21                            mm-refactor-TLB-gathering-API
22        Revert-mm-numa-defer-TLB-flush-for-THP-migrati...
23         mm-migrate-fix-barriers-around-tlb_flush_pending
24        mm-migrate-prevent-racy-access-to-tlb_flush_pe...
25        fault-inject-fix-wrong-should_fail-decision-in...
26        test_kmod-fix-small-memory-leak-on-filesystem-...
27         test_kmod-fix-the-lock-in-register_test_dev_kmod
28        test_kmod-fix-bug-which-allows-negative-values...
29               test_kmod-fix-spelling-mistake-EMTPY-EMPTY
                                ...                        
642461    PATCH-ppc32-fix-single-stepping-of-emulated-in...
642462    PATCH-ppc32-oops-on-kernel-altivec-assist-exce...
642463            PATCH-ppc32-improve-timebase-sync-for-SMP
642464    PATCH-ppc32-ppc4xx_pic-add-acknowledge-when-en...
642465    PATCH-ppc32-fix-bogosity-in-process-freezing-code
642466    PATCH-SELinux-add-support-for-NETLINK_KOBJECT_...
642467                           PATCH-irda_device-oops-fix
642468                           PATCH-Fix-dst_destroy-race
642469    PATCH-net-don-t-call-kmem_cache_create-with-a-...
642470                       PATCH-Fix-linux-atalk.h-header
642471             PATCH-meminfo-add-Cached-underflow-check
642472    PATCH-end_buffer_write_sync-avoid-pointless-as...
642473            PATCH-vmscan-pageout-remove-unneeded-test
642474    PATCH-oom-killer-disable-for-iscsi-lvm2-multip...
642475    PATCH-filemap_getpage-can-block-when-MAP_NONBL...
642476    PATCH-r128_state.c-break-missing-in-switch-sta...
642477    PATCH-SELinux-fix-bug-in-Netlink-message-type-...
642478    PATCH-fix-Bug-4395-modprobe-bttv-freezes-the-c...
642479                        PATCH-Fix-get_compat_sigevent
642480               PATCH-fix-crash-in-entry.S-restore_all
642481                                   PATCH-Fix-acl-Oops
642482    PATCH-re-export-cancel_rearming_delayed_workqueue
642483    PATCH-crypto-call-zlib-end-functions-on-deflat...
642484      PATCH-arm-add-comment-about-max_low_pfn-max_pfn
642485            PATCH-arm-add-comment-about-dma_supported
642486                  PATCH-arm-fix-help-text-for-ixdp465
642487                        PATCH-arm-fix-SIGBUS-handling
642488    PATCH-Avoid-deadlock-in-sync_page_io-by-using-...
642489                              PATCH-mmtimer-build-fix
642490                                     Linux-2.6.12-rc2
Name: 0, Length: 642491, dtype: object

In [10]:
first = s.str.split("-", expand=True)[0]
first


Out[10]:
0               Linux
1                 mtd
2                  MD
3                pnfs
4           selftests
5               iommu
6                 xen
7                 xen
8                 xen
9                 xen
10                x86
11               fuse
12        userfaultfd
13               zram
14               rmap
15                 mm
16                 mm
17        MAINTAINERS
18                 mm
19                 mm
20                 mm
21                 mm
22             Revert
23                 mm
24                 mm
25              fault
26          test_kmod
27          test_kmod
28          test_kmod
29          test_kmod
             ...     
642461          PATCH
642462          PATCH
642463          PATCH
642464          PATCH
642465          PATCH
642466          PATCH
642467          PATCH
642468          PATCH
642469          PATCH
642470          PATCH
642471          PATCH
642472          PATCH
642473          PATCH
642474          PATCH
642475          PATCH
642476          PATCH
642477          PATCH
642478          PATCH
642479          PATCH
642480          PATCH
642481          PATCH
642482          PATCH
642483          PATCH
642484          PATCH
642485          PATCH
642486          PATCH
642487          PATCH
642488          PATCH
642489          PATCH
642490          Linux
Name: 0, Length: 642491, dtype: object

In [12]:
first.value_counts()


Out[12]:
ARM                          35445
drm                          34192
staging                      31581
PATCH                        23955
media                        16047
x86                          13752
net                          12397
ASoC                         12328
ALSA                         10886
V4L                           9459
perf                          8809
Staging                       8731
SCSI                          8511
powerpc                       8468
usb                           7334
MIPS                          7268
KVM                           7180
USB                           6423
mm                            4917
iwlwifi                       4706
Bluetooth                     4578
drivers                       4329
Input                         4269
IB                            3931
mmc                           3862
Btrfs                         3837
mac80211                      3788
sh                            3774
PCI                           3683
ACPI                          3663
                             ...  
arp_tables                       1
vlan_dev                         1
McBSP                            1
O_DIRECT                         1
xenbus_dev                       1
redefine                         1
filp                             1
CONFIG_HIGHPTE                   1
IRQ.txt                          1
iov_iter_advance                 1
send_sigio_to_task               1
POWREPC                          1
Migration                        1
msm_sdcc                         1
generate                         1
protocol.h                       1
async_xor_val                    1
dw_spi                           1
account_steal_time               1
SRPT                             1
snd_mixer_oss_build_input        1
configfs.h                       1
af_rxrpc.h                       1
OneNand                          1
of_mmc_spi                       1
find_filesystem                  1
hx4700                           1
ik8                              1
eth1394                          1
8250_early                       1
Name: 0, Length: 8894, dtype: int64