Evaluating Coccinelle Semantic Patches in linux-mainline

Setting up the notebook


In [1]:
# Import all needed libraries
import json
import calendar
import random
import os
import sys
from datetime import date, timedelta

import faker
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from delorean import parse
import matplotlib

# Enable inline plotting
%matplotlib inline

In [2]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)


Python version 3.6.3 (v3.6.3:2c5fed8, Oct  3 2017, 18:11:49) [MSC v.1900 64 bit (AMD64)]
Pandas version 0.20.3
Matplotlib version 2.1.0

Preparing the data


In [3]:
# Dictionary listing all Semantic Patches in linux-4.14-rc6
# TODO: Should refactor into file 'cocci_linux_4_14_rc6.json'

cocci_linux_4_14_rc6 = {
    'info': {
        'repository': 'linux-4.14-rc6'
    },
    'scripts': [
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'alloc_cast.cocci',
            'confidence': 'High',
            'description': "Remove casting the values returned by memory allocation functions like "
                "kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. "
                "This makes an effort to find cases of casting of values returned "
                "by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, "
                "kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting "
                "as it is not required. The result in the patch case may need some reformatting."
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'kzalloc-simple.cocci',
            'confidence': 'High',
            'description': "Use kzalloc rather than kmalloc followed by memset with 0. "
                "This considers some simple cases that are common and easy to validate. "
                "Note in particular that there are no ...s in the rule, so all of "
                "the matched code has to be contiguous"
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'pool_zalloc-simple.cocci',
            'confidence': 'N.A.',
            'description': "Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0"
        },
        # TODO: Complete fields for all the 59 entries
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'd_find_alias.cocci',
            'confidence': 'Moderate',
            'description': "Make sure calls to d_find_alias() have a corresponding call to dput()."
        },
        {
            'path': 'scripts/coccinelle/debugfs',
            'filename': 'debugfs_simple_attr.cocci',
            'confidence': 'N.A.',
            'description': "Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE "
                "for debugfs files. "
                "Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some "
                "significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe()."
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'drm-get-put.cocci',
            'confidence': 'High',
            'description': "Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() "
                "and drm_*_unreference() helpers."
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'err_cast.cocci',
            'confidence': 'High',
            'description': "Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'kstrdup.cocci',
            'confidence': 'High',
            'description': "Use kstrdup rather than duplicating its implementation"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'memdup.cocci',
            'confidence': 'High',
            'description': "Use kmemdup rather than duplicating its implementation"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'memdup_user.cocci',
            'confidence': 'High',
            'description': "Use memdup_user rather than duplicating its implementation. "
                "This is a little bit restricted to reduce false positives"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'platform_no_drv_owner.cocci',
            'confidence': 'High',
            'description': "Remove .owner field if calls are used which set it automatically"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'pm_runtime.cocci',
            'confidence': 'Medium',
            'description': "Make sure pm_runtime_* calls does not use unnecessary IS_ERR_VALUE"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'ptr_ret.cocci',
            'confidence': 'High',
            'description': "Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'resource_size.cocci',
            'confidence': 'High',
            'description': "Use resource_size function on resource object instead of explicit computation."
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'setup_timer.cocci',
            'confidence': 'High',
            'description': "Use setup_timer function instead of initializing timer with the function and data fields"
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'simple_open.cocci',
            'confidence': 'High',
            'description': "Remove an open coded simple_open() function "
                "and replace file operations references to the function with simple_open() instead."
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'vma_pages.cocci',
            'confidence': 'High',
            'description': "Use vma_pages function on vma object instead of explicit computation."
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'clk_put.cocci',
            'confidence': 'Medium',
            'description': "Find missing clk_puts. This only signals a missing clk_put when "
                "there is a clk_put later in the same function. "
                "False positives can be due to loops."
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'devm_free.cocci',
            'confidence': 'Moderate',
            'description': "Find uses of standard freeing functions on values allocated using devm_ functions. "
                "Values allocated using the devm_functions are freed when the device is detached, "
                "and thus the use of the standard freeing function would cause a double free. "
                "See Documentation/driver-model/devres.txt for more information. "
                "A difficulty of detecting this problem is that the standard freeing function might "
                "be called from a different function than the one containing the allocation function. "
                "It is thus necessary to make the connection between the allocation function "
                "and the freeing function. "
                "Here this is done using the specific argument text, which is prone to false positives. "
                "There is no rule for the request_region and request_mem_region variants because "
                "this heuristic seems to be a bit less reliable in these cases."
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'ifnullfree.cocci',
            'confidence': 'N.A.',
            'description': "NULL check before some freeing functions is not needed. "
                "Based on checkpatch warning \"kfree(NULL) is safe this check is probably "
                "not required\" and kfreeaddr.cocci"
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'iounmap.cocci',
            'confidence': 'Moderate',
            'description': "Find missing iounmaps. "
                "This only signals a missing iounmap when there is an iounmap later in the same function. "
                "False positives can be due to loops."
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'kfree.cocci',
            'confidence': 'Moderate',
            'description': "Find a use after free. "
                "Values of variables may imply that some execution paths are not possible, resulting in false positives. "
                "Another source of false positives are macros such as "
                "SCTP_DBG_OBJCNT_DEC that do not actually evaluate their argument"
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'kfreeaddr.cocci',
            'confidence': 'High',
            'description': "Free of a structure field"
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'pci_free_consistent.cocci',
            'confidence': 'Moderate',
            'description': "Find missing pci_free_consistent for every pci_alloc_consistent."
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'device_node_continue.cocci',
            'confidence': 'High',
            'description': "Device node iterators put the previous value of the index variable, "
                "so an explicit put causes a double put."
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'fen.cocci',
            'confidence': 'High',
            'description': "These iterators only exit normally when the loop cursor is NULL, "
                "so there is no point to call of_node_put on the final value."
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'ifnull.cocci',
            'confidence': 'Moderate',
            'description': "Many iterators have the property that the first argument "
                "is always bound to a real list element, never NULL. "
                "False positives arise for some iterators that do not have this property, "
                "or in cases when the loop cursor is reassigned. The latter should only "
                "happen when the matched code is on the way to a loop exit (break, goto, or return)."
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'list_entry_update.cocci',
            'confidence': 'High',
            'description': "list_for_each_entry uses its first argument to get "
                "from one element of the list to the next, so it is usually not a good idea to reassign it. "
                "The first rule finds such a reassignment and the second rule checks that there is "
                "a path from the reassignment back to the top of the loop."
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'use_after_iter.cocci',
            'confidence': 'Moderate',
            'description': "If list_for_each_entry, etc complete a traversal of the list, "
                "the iterator variable ends up pointing to an address at an offset from the list head, "
                "and not a meaningful structure. Thus this value should not be used after "
                "the end of the iterator. "
                "False positives arise when there is a goto in the iterator and the reported reference "
                "is at the label of this goto. Some flag tests may also cause a report to be a false positive."
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'call_kern.cocci',
            'confidence': 'Moderate',
            'description': "Find functions that refer to GFP_KERNEL but are called with locks held. "
                "The proposed change of converting the GFP_KERNEL is not necessarily the correct one. "
                "It may be desired to unlock the lock, or to not call the function under the lock in the first place."
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'double_lock.cocci',
            'confidence': 'Moderate',
            'description': "Find double locks. False positives may occur when some paths cannot occur at execution, "
                "due to the values of variables, and when there is an intervening function call that releases the lock."
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'flags.cocci',
            'confidence': 'High',
            'description': "Find nested lock+irqsave functions that use the same flags variables"
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'mini_lock.cocci',
            'confidence': 'Moderate',
            'description': "Find missing unlocks. "
                "This semantic match considers the specific case where the unlock is missing from an if branch, "
                "and there is a lock before the if and an unlock after the if. "
                "False positives are due to cases where the if branch represents a case where the function "
                "is supposed to exit with the lock held, or where there is some preceding function call "
                "that releases the lock."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'array_size.cocci',
            'confidence': 'High',
            'description': "Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element "
                "This makes an effort to find cases where ARRAY_SIZE can be used such as where "
                "there is a division of sizeof the array by the sizeof its first element or by any "
                "indexed element or the element type. It replaces the division of the two sizeofs by ARRAY_SIZE."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'badty.cocci',
            'confidence': 'Moderate',
            'description': "Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element "
                "This makes an effort to find cases where the argument to sizeof is wrong in memory "
                "allocation functions by checking the type of the allocated memory when it is a double pointer "
                "and ensuring the sizeof argument takes a pointer to the the memory being allocated. "
                "There are false positives in cases the sizeof argument is not used in constructing the return value. "
                "The result may need some reformatting."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolconv.cocci',
            'confidence': 'N.A.',
            'description': "Remove unneeded conversion to bool. "
                "Relational and logical operators evaluate to bool, explicit conversion is overly verbose and unneeded"
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolinit.cocci',
            'confidence': 'High',
            'description': "Bool initializations should use true and false. "
                "Bool tests don't need comparisons. "
                "Based on contributions from Joe Perches, Rusty Russell and Bruce W Allan."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolreturn.cocci',
            'confidence': 'High',
            'description': "Return statements in functions returning bool should use true/false instead of 1/0."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'bugon.cocci',
            'confidence': 'High',
            'description': "Use BUG_ON instead of a if condition followed by BUG. "
                "This makes an effort to find cases where BUG() follows an if condition "
                "on an expression and replaces the if condition and BUG() with a BUG_ON "
                "having the conditional expression of the if statement as argument."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'cond_no_effect.cocci',
            'confidence': 'Moderate',
            'description': "Find conditions where if and else branch are functionally identical. "
                "There can be false positives in cases where the positional information is used (as with lockdep) "
                "or where the identity is a placeholder for not yet handled cases. "
                "Unfortunately there also seems to be a tendency to use the last if else/else "
                "as a \"default behavior\" - which some might consider a legitimate coding pattern. "
                "From discussion on kernelnewbies though it seems that this is not really an accepted pattern "
                "and if at all it would need to be commented"
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'cstptr.cocci',
            'confidence': 'High',
            'description': "PTR_ERR should be applied before its argument is reassigned, typically to NULL"
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'doubleinit.cocci',
            'confidence': 'Low',
            'description': "Find duplicate field initializations. "
                "This has a high rate of false positives due to #ifdefs, which Coccinelle is not aware of "
                "in a structure initialization."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'ifaddr.cocci',
            'confidence': 'High',
            'description': "The address of a variable or field is likely always to be non-zero."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'ifcol.cocci',
            'confidence': 'Low',
            'description': "Find confusingly indented code in or after an if. An if branch should be indented. "
                "The code following an if should not be indented. Sometimes, code after an if that is indented "
                "is actually intended to be part of the if branch. "
                "This has a high rate of false positives, because Coccinelle's column calculation "
                "does not distinguish between spaces and tabs, so code that is not visually aligned "
                "may be considered to be in the same column."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'irqf_oneshot.cocci',
            'confidence': 'Moderate',
            'description': "Since commit 1c6c69525b40 (genirq: Reject bogus threaded irq requests) "
                "threaded IRQs without a primary handler need to be requested with IRQF_ONESHOT, "
                "otherwise the request will fail. "
                "So pass the IRQF_ONESHOT flag in this case."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'noderef.cocci',
            'confidence': 'High',
            'description': "sizeof when applied to a pointer typed expression gives the size of the pointer"
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'of_table.cocci',
            'confidence': 'Medium',
            'description': "Make sure (of/i2c/platform)_device_id tables are NULL terminated"
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'orplus.cocci',
            'confidence': 'Moderate',
            'description': "Check for constants that are added but are used elsewhere as bitmasks. "
                "The results should be checked manually to ensure that the nonzero bits "
                "in the two constants are actually disjoint."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'returnvar.cocci',
            'confidence': 'Moderate',
            'description': "Remove unneeded variable used to store return value."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'semicolon.cocci',
            'confidence': 'Moderate',
            'description': "Remove unneeded semicolon."
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'warn.cocci',
            'confidence': 'High',
            'description': "Use WARN(1,...) rather than printk followed by WARN_ON(1)"
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'badzero.cocci',
            'confidence': 'High',
            'description': "Compare pointer-typed values to NULL rather than 0. "
                "This makes an effort to choose between !x and x == NULL. "
                "!x is used if it has previously been used with the function used to initialize x. "
                "This relies on type information. "
                "More type information can be obtained using the option -all_includes and the option -I "
                "to specify an include path."
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'deref_null.cocci',
            'confidence': 'Moderate',
            'description': "A variable is dereferenced under a NULL test. "
                "Even though it is known to be NULL."
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'eno.cocci',
            'confidence': 'High',
            'description': "The various basic memory allocation functions don't return ERR_PTR"
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'kmerr.cocci',
            'confidence': 'High',
            'description': "This semantic patch looks for kmalloc etc that are not followed by a NULL check. "
                "It only gives a report in the case where there is some error handling code later "
                "in the function, which may be helpful in determining what the error handling code "
                "for the call to kmalloc etc should be."
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'doublebitand.cocci',
            'confidence': 'Moderate',
            'description': "Find bit operations that include the same argument more than once. "
                "One source of false positives is when the argument performs a side effect. "
                "Another source of false positives is when a neutral value such as 0 for | is used "
                "to indicate no information, to maintain the same structure as other similar expressions"
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'doubletest.cocci',
            'confidence': 'Moderate',
            'description': "Find &&/|| operations that include the same argument more than once. "
                "A common source of false positives is when the argument performs a side effect."
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'odd_ptr_err.cocci',
            'confidence': 'High',
            'description': "PTR_ERR should access the value just tested by IS_ERR. "
                "There can be false positives in the patch case, where it is the call to IS_ERR that is wrong."
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'unsigned_lesser_than_zero.cocci',
            'confidence': 'Average',
            'description': "Unsigned expressions cannot be lesser than zero. "
                "Presence of comparisons 'unsigned (<|<=|>|>=) 0' often indicates a bug, usually wrong type of variable. "
                "To reduce number of false positives following tests have been added: "
                "- parts of range checks are skipped, eg. \"if (u < 0 || u > 15) ...\", developers prefer "
                "to keep such code, "
                "- comparisons \"<= 0\" and \"> 0\" are performed only on results of signed functions/macros,"
                "- hardcoded list of signed functions/macros with always non-negative result is used "
                "to avoid false positives difficult to detect by other ways"
        },
    ]
}

# cocci_linux_4_14_rc6

In [4]:
#print(cocci_linux_4_14_rc6)
len(cocci_linux_4_14_rc6['scripts'])


Out[4]:
59

Creating the DataFrame for cocci_linux_4_14_rc6


In [5]:
data = cocci_linux_4_14_rc6['scripts']
df = DataFrame(data) # , index=cocci_linux_4_14_rc6['scripts']['filename']

#df.count()
#df.describe()
#json.dumps(data)
#print(df.index)
#print()
#print(df.filename)
#print()

#df.head()

In [6]:
# Rearrange columns
cols = ['path', 'filename', 'confidence', 'description']
df = df[cols]

#df

In [7]:
# Notice that text of column "description" as shown above is truncated,
# however the cell inside the DataFrame contains the full value

#df.iloc[0]['description']

In [8]:
# Display full text for column "description"
# See <https://stackoverflow.com/questions/23388810/ipython-notebook-output-cell-is-truncating-contents-of-my-list>
# See <http://pandas.pydata.org/pandas-docs/stable/options.html>

from pandas import DataFrame
from IPython.display import HTML

pd.options.display.max_colwidth = 2000
pd.options.display.max_seq_items = 200

HTML(df.to_html())


Out[8]:
path filename confidence description
0 scripts/coccinelle/api/alloc alloc_cast.cocci High Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting.
1 scripts/coccinelle/api/alloc kzalloc-simple.cocci High Use kzalloc rather than kmalloc followed by memset with 0. This considers some simple cases that are common and easy to validate. Note in particular that there are no ...s in the rule, so all of the matched code has to be contiguous
2 scripts/coccinelle/api/alloc pool_zalloc-simple.cocci N.A. Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0
3 scripts/coccinelle/api d_find_alias.cocci Moderate Make sure calls to d_find_alias() have a corresponding call to dput().
4 scripts/coccinelle/debugfs debugfs_simple_attr.cocci N.A. Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE for debugfs files. Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().
5 scripts/coccinelle/api drm-get-put.cocci High Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() and drm_*_unreference() helpers.
6 scripts/coccinelle/api err_cast.cocci High Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))
7 scripts/coccinelle/api kstrdup.cocci High Use kstrdup rather than duplicating its implementation
8 scripts/coccinelle/api memdup.cocci High Use kmemdup rather than duplicating its implementation
9 scripts/coccinelle/api memdup_user.cocci High Use memdup_user rather than duplicating its implementation. This is a little bit restricted to reduce false positives
10 scripts/coccinelle/api platform_no_drv_owner.cocci High Remove .owner field if calls are used which set it automatically
11 scripts/coccinelle/api pm_runtime.cocci Medium Make sure pm_runtime_* calls does not use unnecessary IS_ERR_VALUE
12 scripts/coccinelle/api ptr_ret.cocci High Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR
13 scripts/coccinelle/api resource_size.cocci High Use resource_size function on resource object instead of explicit computation.
14 scripts/coccinelle/api setup_timer.cocci High Use setup_timer function instead of initializing timer with the function and data fields
15 scripts/coccinelle/api simple_open.cocci High Remove an open coded simple_open() function and replace file operations references to the function with simple_open() instead.
16 scripts/coccinelle/api vma_pages.cocci High Use vma_pages function on vma object instead of explicit computation.
17 scripts/coccinelle/free clk_put.cocci Medium Find missing clk_puts. This only signals a missing clk_put when there is a clk_put later in the same function. False positives can be due to loops.
18 scripts/coccinelle/free devm_free.cocci Moderate Find uses of standard freeing functions on values allocated using devm_ functions. Values allocated using the devm_functions are freed when the device is detached, and thus the use of the standard freeing function would cause a double free. See Documentation/driver-model/devres.txt for more information. A difficulty of detecting this problem is that the standard freeing function might be called from a different function than the one containing the allocation function. It is thus necessary to make the connection between the allocation function and the freeing function. Here this is done using the specific argument text, which is prone to false positives. There is no rule for the request_region and request_mem_region variants because this heuristic seems to be a bit less reliable in these cases.
19 scripts/coccinelle/free ifnullfree.cocci N.A. NULL check before some freeing functions is not needed. Based on checkpatch warning "kfree(NULL) is safe this check is probably not required" and kfreeaddr.cocci
20 scripts/coccinelle/free iounmap.cocci Moderate Find missing iounmaps. This only signals a missing iounmap when there is an iounmap later in the same function. False positives can be due to loops.
21 scripts/coccinelle/free kfree.cocci Moderate Find a use after free. Values of variables may imply that some execution paths are not possible, resulting in false positives. Another source of false positives are macros such as SCTP_DBG_OBJCNT_DEC that do not actually evaluate their argument
22 scripts/coccinelle/free kfreeaddr.cocci High Free of a structure field
23 scripts/coccinelle/free pci_free_consistent.cocci Moderate Find missing pci_free_consistent for every pci_alloc_consistent.
24 scripts/coccinelle/free device_node_continue.cocci High Device node iterators put the previous value of the index variable, so an explicit put causes a double put.
25 scripts/coccinelle/iterators fen.cocci High These iterators only exit normally when the loop cursor is NULL, so there is no point to call of_node_put on the final value.
26 scripts/coccinelle/iterators ifnull.cocci Moderate Many iterators have the property that the first argument is always bound to a real list element, never NULL. False positives arise for some iterators that do not have this property, or in cases when the loop cursor is reassigned. The latter should only happen when the matched code is on the way to a loop exit (break, goto, or return).
27 scripts/coccinelle/iterators list_entry_update.cocci High list_for_each_entry uses its first argument to get from one element of the list to the next, so it is usually not a good idea to reassign it. The first rule finds such a reassignment and the second rule checks that there is a path from the reassignment back to the top of the loop.
28 scripts/coccinelle/iterators use_after_iter.cocci Moderate If list_for_each_entry, etc complete a traversal of the list, the iterator variable ends up pointing to an address at an offset from the list head, and not a meaningful structure. Thus this value should not be used after the end of the iterator. False positives arise when there is a goto in the iterator and the reported reference is at the label of this goto. Some flag tests may also cause a report to be a false positive.
29 scripts/coccinelle/locks call_kern.cocci Moderate Find functions that refer to GFP_KERNEL but are called with locks held. The proposed change of converting the GFP_KERNEL is not necessarily the correct one. It may be desired to unlock the lock, or to not call the function under the lock in the first place.
30 scripts/coccinelle/locks double_lock.cocci Moderate Find double locks. False positives may occur when some paths cannot occur at execution, due to the values of variables, and when there is an intervening function call that releases the lock.
31 scripts/coccinelle/locks flags.cocci High Find nested lock+irqsave functions that use the same flags variables
32 scripts/coccinelle/locks mini_lock.cocci Moderate Find missing unlocks. This semantic match considers the specific case where the unlock is missing from an if branch, and there is a lock before the if and an unlock after the if. False positives are due to cases where the if branch represents a case where the function is supposed to exit with the lock held, or where there is some preceding function call that releases the lock.
33 scripts/coccinelle/misc array_size.cocci High Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element This makes an effort to find cases where ARRAY_SIZE can be used such as where there is a division of sizeof the array by the sizeof its first element or by any indexed element or the element type. It replaces the division of the two sizeofs by ARRAY_SIZE.
34 scripts/coccinelle/misc badty.cocci Moderate Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element This makes an effort to find cases where the argument to sizeof is wrong in memory allocation functions by checking the type of the allocated memory when it is a double pointer and ensuring the sizeof argument takes a pointer to the the memory being allocated. There are false positives in cases the sizeof argument is not used in constructing the return value. The result may need some reformatting.
35 scripts/coccinelle/misc boolconv.cocci N.A. Remove unneeded conversion to bool. Relational and logical operators evaluate to bool, explicit conversion is overly verbose and unneeded
36 scripts/coccinelle/misc boolinit.cocci High Bool initializations should use true and false. Bool tests don't need comparisons. Based on contributions from Joe Perches, Rusty Russell and Bruce W Allan.
37 scripts/coccinelle/misc boolreturn.cocci High Return statements in functions returning bool should use true/false instead of 1/0.
38 scripts/coccinelle/misc bugon.cocci High Use BUG_ON instead of a if condition followed by BUG. This makes an effort to find cases where BUG() follows an if condition on an expression and replaces the if condition and BUG() with a BUG_ON having the conditional expression of the if statement as argument.
39 scripts/coccinelle/misc cond_no_effect.cocci Moderate Find conditions where if and else branch are functionally identical. There can be false positives in cases where the positional information is used (as with lockdep) or where the identity is a placeholder for not yet handled cases. Unfortunately there also seems to be a tendency to use the last if else/else as a "default behavior" - which some might consider a legitimate coding pattern. From discussion on kernelnewbies though it seems that this is not really an accepted pattern and if at all it would need to be commented
40 scripts/coccinelle/misc cstptr.cocci High PTR_ERR should be applied before its argument is reassigned, typically to NULL
41 scripts/coccinelle/misc doubleinit.cocci Low Find duplicate field initializations. This has a high rate of false positives due to #ifdefs, which Coccinelle is not aware of in a structure initialization.
42 scripts/coccinelle/misc ifaddr.cocci High The address of a variable or field is likely always to be non-zero.
43 scripts/coccinelle/misc ifcol.cocci Low Find confusingly indented code in or after an if. An if branch should be indented. The code following an if should not be indented. Sometimes, code after an if that is indented is actually intended to be part of the if branch. This has a high rate of false positives, because Coccinelle's column calculation does not distinguish between spaces and tabs, so code that is not visually aligned may be considered to be in the same column.
44 scripts/coccinelle/misc irqf_oneshot.cocci Moderate Since commit 1c6c69525b40 (genirq: Reject bogus threaded irq requests) threaded IRQs without a primary handler need to be requested with IRQF_ONESHOT, otherwise the request will fail. So pass the IRQF_ONESHOT flag in this case.
45 scripts/coccinelle/misc noderef.cocci High sizeof when applied to a pointer typed expression gives the size of the pointer
46 scripts/coccinelle/misc of_table.cocci Medium Make sure (of/i2c/platform)_device_id tables are NULL terminated
47 scripts/coccinelle/misc orplus.cocci Moderate Check for constants that are added but are used elsewhere as bitmasks. The results should be checked manually to ensure that the nonzero bits in the two constants are actually disjoint.
48 scripts/coccinelle/misc returnvar.cocci Moderate Remove unneeded variable used to store return value.
49 scripts/coccinelle/misc semicolon.cocci Moderate Remove unneeded semicolon.
50 scripts/coccinelle/misc warn.cocci High Use WARN(1,...) rather than printk followed by WARN_ON(1)
51 scripts/coccinelle/null badzero.cocci High Compare pointer-typed values to NULL rather than 0. This makes an effort to choose between !x and x == NULL. !x is used if it has previously been used with the function used to initialize x. This relies on type information. More type information can be obtained using the option -all_includes and the option -I to specify an include path.
52 scripts/coccinelle/null deref_null.cocci Moderate A variable is dereferenced under a NULL test. Even though it is known to be NULL.
53 scripts/coccinelle/null eno.cocci High The various basic memory allocation functions don't return ERR_PTR
54 scripts/coccinelle/null kmerr.cocci High This semantic patch looks for kmalloc etc that are not followed by a NULL check. It only gives a report in the case where there is some error handling code later in the function, which may be helpful in determining what the error handling code for the call to kmalloc etc should be.
55 scripts/coccinelle/tests doublebitand.cocci Moderate Find bit operations that include the same argument more than once. One source of false positives is when the argument performs a side effect. Another source of false positives is when a neutral value such as 0 for | is used to indicate no information, to maintain the same structure as other similar expressions
56 scripts/coccinelle/tests doubletest.cocci Moderate Find &&/|| operations that include the same argument more than once. A common source of false positives is when the argument performs a side effect.
57 scripts/coccinelle/tests odd_ptr_err.cocci High PTR_ERR should access the value just tested by IS_ERR. There can be false positives in the patch case, where it is the call to IS_ERR that is wrong.
58 scripts/coccinelle/tests unsigned_lesser_than_zero.cocci Average Unsigned expressions cannot be lesser than zero. Presence of comparisons 'unsigned (<|<=|>|>=) 0' often indicates a bug, usually wrong type of variable. To reduce number of false positives following tests have been added: - parts of range checks are skipped, eg. "if (u < 0 || u > 15) ...", developers prefer to keep such code, - comparisons "<= 0" and "> 0" are performed only on results of signed functions/macros,- hardcoded list of signed functions/macros with always non-negative result is used to avoid false positives difficult to detect by other ways

In [9]:
# Export the dataframe to an Excel file
#df.to_excel('test.xls')

Creating DataFrames for instrument-coccicheck reports


In [10]:
# Report against linux-4.14-rc6 full-tree

report_linux_4_14_rc6_fulltree = {
    'info': {
        'note': """Report against linux-4.14-rc6 full-tree""",
        'script_start': 'Thu Oct  5 13:11:29 CEST 2017',
        'src_sha': 'd81fa669e3de7eb8a631d7d95dac5fbcb2bf9d4e',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',                'L':  13, 'W':   0, 'E':   0},
        {'filename': 'kzalloc-simple.cocci',            'L':   4, 'W':   4, 'E':   0},
        {'filename': 'pool_zalloc-simple.cocci',        'L':  22, 'W':  22, 'E':   0}, 
        {'filename': 'd_find_alias.cocci',              'L':   0, 'W':   0, 'E':   0},
        {'filename': 'debugfs_simple_attr.cocci',       'L':  83, 'W':  83, 'E':   0},
        {'filename': 'drm-get-put.cocci',               'L':  48, 'W':  48, 'E':   0},
        {'filename': 'err_cast.cocci',                  'L':   2, 'W':   2, 'E':   0},
        {'filename': 'kstrdup.cocci',                   'L':   0, 'W':   0, 'E':   0},
        {'filename': 'memdup.cocci',                    'L':   0, 'W':   0, 'E':   0},
        {'filename': 'memdup_user.cocci',               'L':   9, 'W':   9, 'E':   0},
        {'filename': 'platform_no_drv_owner.cocci',     'L':  20, 'W':   0, 'E':   0},
        {'filename': 'pm_runtime.cocci',                'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ptr_ret.cocci',                   'L':  55, 'W':  55, 'E':   0},
        {'filename': 'resource_size.cocci',             'L':  10, 'W':  10, 'E':   0},
        {'filename': 'setup_timer.cocci',               'L': 222, 'W':   0, 'E':   0},
        {'filename': 'simple_open.cocci',               'L':   1, 'W':   1, 'E':   0},
        {'filename': 'vma_pages.cocci',                 'L':   7, 'W':   7, 'E':   0},
        {'filename': 'clk_put.cocci',                   'L':   0, 'W':   0, 'E':   0},
        {'filename': 'devm_free.cocci',                 'L':  21, 'W':  21, 'E':   0},
        {'filename': 'ifnullfree.cocci',                'L': 124, 'W': 124, 'E':   0},
        {'filename': 'iounmap.cocci',                   'L':   5, 'W':   0, 'E':   5},
        {'filename': 'kfree.cocci',                     'L':  63, 'W':   0, 'E':  63},
        {'filename': 'kfreeaddr.cocci',                 'L':   0, 'W':   0, 'E':   0},
        {'filename': 'pci_free_consistent.cocci',       'L':   7, 'W':   0, 'E':   7},
        {'filename': 'device_node_continue.cocci',      'L':   3, 'W':   0, 'E':   3},
        {'filename': 'fen.cocci',                       'L':   2, 'W':   0, 'E':   2},
        {'filename': 'ifnull.cocci',                    'L':  27, 'W':   0, 'E':  27},
        {'filename': 'list_entry_update.cocci',         'L':   4, 'W':   0, 'E':   0},
        {'filename': 'use_after_iter.cocci',            'L':  27, 'W':   0, 'E':  27},
        {'filename': 'call_kern.cocci',                 'L':   6, 'W':   0, 'E':   6},
        {'filename': 'double_lock.cocci',               'L':  34, 'W':   0, 'E':   0},
        {'filename': 'flags.cocci',                     'L':   1, 'W':   0, 'E':   1},
        {'filename': 'mini_lock.cocci',                 'L': 288, 'W':   0, 'E':   0},
        {'filename': 'array_size.cocci',                'L':   8, 'W':   8, 'E':   0},
        {'filename': 'badty.cocci',                     'L':   4, 'W':   4, 'E':   0},
        {'filename': 'boolconv.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolinit.cocci',                  'L':   1775, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolreturn.cocci',                'L': 113, 'W': 113, 'E':   0},
        {'filename': 'bugon.cocci',                     'L': 608, 'W': 304, 'E':   0},
        {'filename': 'cond_no_effect.cocci',            'L':  95, 'W':  95, 'E':   0},
        {'filename': 'cstptr.cocci',                    'L':   4, 'W':   0, 'E':   4},
        {'filename': 'doubleinit.cocci',                'L': 159, 'W':   0, 'E':   0},
        {'filename': 'ifaddr.cocci',                    'L':  13, 'W':   0, 'E':  13},
        {'filename': 'ifcol.cocci',                     'L': 110, 'W':   0, 'E':   0},
        {'filename': 'irqf_oneshot.cocci',              'L':  52, 'W':   0, 'E':  52},
        {'filename': 'noderef.cocci',                   'L':   6, 'W':   0, 'E':   6},
        {'filename': 'of_table.cocci',                  'L':   3, 'W':   0, 'E':   0},
        {'filename': 'orplus.cocci',                    'L': 223, 'W': 223, 'E':   0},
        {'filename': 'returnvar.cocci',                 'L': 254, 'W':   0, 'E':   0},
        {'filename': 'semicolon.cocci',                 'L': 327, 'W':   0, 'E':   0},
        {'filename': 'warn.cocci',                      'L':  18, 'W':   0, 'E':   0},
        {'filename': 'badzero.cocci',                   'L': 119, 'W': 119, 'E':   0},
        {'filename': 'deref_null.cocci',                'L':  32, 'W':   0, 'E':  32},
        {'filename': 'eno.cocci',                       'L':   1, 'W':   0, 'E':   1},
        {'filename': 'kmerr.cocci',                     'L':  13, 'W':   0, 'E':   0},
        {'filename': 'doublebitand.cocci',              'L':  79, 'W':   0, 'E':   0},
        {'filename': 'doubletest.cocci',                'L':  94, 'W':   0, 'E':   0},
        {'filename': 'odd_ptr_err.cocci',               'L':   4, 'W':   0, 'E':   0},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'L':  18, 'W':  18, 'E':   0},
    ]
}

In [11]:
data2_full = DataFrame(report_linux_4_14_rc6_fulltree['statistics'])
data2_full = data2_full[['filename','L', 'W', 'E']]

data2_full


Out[11]:
filename L W E
0 alloc_cast.cocci 13.0 0.0 0.0
1 kzalloc-simple.cocci 4.0 4.0 0.0
2 pool_zalloc-simple.cocci 22.0 22.0 0.0
3 d_find_alias.cocci 0.0 0.0 0.0
4 debugfs_simple_attr.cocci 83.0 83.0 0.0
5 drm-get-put.cocci 48.0 48.0 0.0
6 err_cast.cocci 2.0 2.0 0.0
7 kstrdup.cocci 0.0 0.0 0.0
8 memdup.cocci 0.0 0.0 0.0
9 memdup_user.cocci 9.0 9.0 0.0
10 platform_no_drv_owner.cocci 20.0 0.0 0.0
11 pm_runtime.cocci 0.0 0.0 0.0
12 ptr_ret.cocci 55.0 55.0 0.0
13 resource_size.cocci 10.0 10.0 0.0
14 setup_timer.cocci 222.0 0.0 0.0
15 simple_open.cocci 1.0 1.0 0.0
16 vma_pages.cocci 7.0 7.0 0.0
17 clk_put.cocci 0.0 0.0 0.0
18 devm_free.cocci 21.0 21.0 0.0
19 ifnullfree.cocci 124.0 124.0 0.0
20 iounmap.cocci 5.0 0.0 5.0
21 kfree.cocci 63.0 0.0 63.0
22 kfreeaddr.cocci 0.0 0.0 0.0
23 pci_free_consistent.cocci 7.0 0.0 7.0
24 device_node_continue.cocci 3.0 0.0 3.0
25 fen.cocci 2.0 0.0 2.0
26 ifnull.cocci 27.0 0.0 27.0
27 list_entry_update.cocci 4.0 0.0 0.0
28 use_after_iter.cocci 27.0 0.0 27.0
29 call_kern.cocci 6.0 0.0 6.0
30 double_lock.cocci 34.0 0.0 0.0
31 flags.cocci 1.0 0.0 1.0
32 mini_lock.cocci 288.0 0.0 0.0
33 array_size.cocci 8.0 8.0 0.0
34 badty.cocci 4.0 4.0 0.0
35 boolconv.cocci NaN NaN NaN
36 boolinit.cocci 1775.0 NaN NaN
37 boolreturn.cocci 113.0 113.0 0.0
38 bugon.cocci 608.0 304.0 0.0
39 cond_no_effect.cocci 95.0 95.0 0.0
40 cstptr.cocci 4.0 0.0 4.0
41 doubleinit.cocci 159.0 0.0 0.0
42 ifaddr.cocci 13.0 0.0 13.0
43 ifcol.cocci 110.0 0.0 0.0
44 irqf_oneshot.cocci 52.0 0.0 52.0
45 noderef.cocci 6.0 0.0 6.0
46 of_table.cocci 3.0 0.0 0.0
47 orplus.cocci 223.0 223.0 0.0
48 returnvar.cocci 254.0 0.0 0.0
49 semicolon.cocci 327.0 0.0 0.0
50 warn.cocci 18.0 0.0 0.0
51 badzero.cocci 119.0 119.0 0.0
52 deref_null.cocci 32.0 0.0 32.0
53 eno.cocci 1.0 0.0 1.0
54 kmerr.cocci 13.0 0.0 0.0
55 doublebitand.cocci 79.0 0.0 0.0
56 doubletest.cocci 94.0 0.0 0.0
57 odd_ptr_err.cocci 4.0 0.0 0.0
58 unsigned_lesser_than_zero.cocci 18.0 18.0 0.0

In [12]:
#data2_full.plot()
#data2_full[['L', 'W', 'E']].plot()
#data2_full[['filename','L', 'W', 'E']].plot(kind='bar')

#len(data2_full['filename'].unique())

In [13]:
# Report against linux-4.4.50 minimized-tree

report_linux_4_4_50_minimizedtree = {
    'info': {
        'note': """Report against linux-4.4.50 minimzed-tree""",
        'script_start': 'Mon Oct 23 16:29:52 IST 2017',
        'cocci_sha': '9e66317d3c92ddaab330c125dfe9d06eee268aff',
        'src_sha': '90dcab23bbc81fbfa25dfdb91d4ce974a69bd210',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',                'L':   2, 'W':   0, 'E':   0},
        {'filename': 'kzalloc-simple.cocci',            'L':   0, 'W':   0, 'E':   0},
        {'filename': 'pool_zalloc-simple.cocci',        'L':   5, 'W':   0, 'E':   0}, 
        {'filename': 'd_find_alias.cocci',              'L':   0, 'W':   0, 'E':   0},
        {'filename': 'debugfs_simple_attr.cocci',       'L':   0, 'W':   0, 'E':   0},
        {'filename': 'drm-get-put.cocci',               'L':   0, 'W':   0, 'E':   0},
        {'filename': 'err_cast.cocci',                  'L':   0, 'W':   0, 'E':   0},
        {'filename': 'kstrdup.cocci',                   'L':   0, 'W':   0, 'E':   0},
        {'filename': 'memdup.cocci',                    'L':   0, 'W':   0, 'E':   0},
        {'filename': 'memdup_user.cocci',               'L':   4, 'W':   4, 'E':   0},
        {'filename': 'platform_no_drv_owner.cocci',     'L':   0, 'W':   0, 'E':   0},
        {'filename': 'pm_runtime.cocci',                'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ptr_ret.cocci',                   'L':   2, 'W':   2, 'E':   0},
        {'filename': 'resource_size.cocci',             'L':   0, 'W':   0, 'E':   0},
        {'filename': 'setup_timer.cocci',               'L':  11, 'W':   0, 'E':   0},
        {'filename': 'simple_open.cocci',               'L':   0, 'W':   0, 'E':   0},
        {'filename': 'vma_pages.cocci',                 'L':   2, 'W':   2, 'E':   0},
        {'filename': 'clk_put.cocci',                   'L':   0, 'W':   0, 'E':   0},
        {'filename': 'devm_free.cocci',                 'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ifnullfree.cocci',                'L':  14, 'W':  14, 'E':   0},
        {'filename': 'iounmap.cocci',                   'L':   1, 'W':   0, 'E':   1},
        {'filename': 'kfree.cocci',                     'L':   2, 'W':   0, 'E':   2},
        {'filename': 'kfreeaddr.cocci',                 'L':   0, 'W':   0, 'E':   0},
        {'filename': 'pci_free_consistent.cocci',       'L':   0, 'W':   0, 'E':   0},
        {'filename': 'device_node_continue.cocci',      'L':   0, 'W':   0, 'E':   0},
        {'filename': 'fen.cocci',                       'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ifnull.cocci',                    'L':   5, 'W':   0, 'E':   5},
        {'filename': 'list_entry_update.cocci',         'L':   1, 'W':   0, 'E':   0},
        {'filename': 'use_after_iter.cocci',            'L':   1, 'W':   0, 'E':   1},
        {'filename': 'call_kern.cocci',                 'L':   3, 'W':   0, 'E':   3},
        {'filename': 'double_lock.cocci',               'L':  11, 'W':   0, 'E':   0},
        {'filename': 'flags.cocci',                     'L':   0, 'W':   0, 'E':   0},
        {'filename': 'mini_lock.cocci',                 'L':  45, 'W':   0, 'E':   0},
        {'filename': 'array_size.cocci',                'L':   0, 'W':   0, 'E':   0},
        {'filename': 'badty.cocci',                     'L':   0, 'W':   0, 'E':   0},
        {'filename': 'boolconv.cocci',                  'L':   2, 'W':   2, 'E':   0},
        {'filename': 'boolinit.cocci',                  'L':  37, 'W':  37, 'E':   0},
        {'filename': 'boolreturn.cocci',                'L':   4, 'W':   4, 'E':   0},
        {'filename': 'bugon.cocci',                     'L':  92, 'W':  46, 'E':   0},
        {'filename': 'cond_no_effect.cocci',            'L':   1, 'W':   1, 'E':   0},
        {'filename': 'cstptr.cocci',                    'L':   1, 'W':   0, 'E':   1},
        {'filename': 'doubleinit.cocci',                'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ifaddr.cocci',                    'L':   0, 'W':   0, 'E':   0},
        {'filename': 'ifcol.cocci',                     'L':   3, 'W':   0, 'E':   0},
        {'filename': 'irqf_oneshot.cocci',              'L':   2, 'W':   0, 'E':   2},
        {'filename': 'noderef.cocci',                   'L':   1, 'W':   0, 'E':   1},
        {'filename': 'of_table.cocci',                  'L':   0, 'W':   0, 'E':   0},
        {'filename': 'orplus.cocci',                    'L':   5, 'W':   5, 'E':   0},
        {'filename': 'returnvar.cocci',                 'L':   8, 'W':   0, 'E':   0},
        {'filename': 'semicolon.cocci',                 'L':   4, 'W':   0, 'E':   0},
        {'filename': 'warn.cocci',                      'L':   5, 'W':   0, 'E':   0},
        {'filename': 'badzero.cocci',                   'L':   0, 'W':   0, 'E':   0},
        {'filename': 'deref_null.cocci',                'L':   0, 'W':   0, 'E':   0},
        {'filename': 'eno.cocci',                       'L':   0, 'W':   0, 'E':   0},
        {'filename': 'kmerr.cocci',                     'L':   0, 'W':   0, 'E':   0},
        {'filename': 'doublebitand.cocci',              'L':  20, 'W':   0, 'E':   0},
        {'filename': 'doubletest.cocci',                'L':   2, 'W':   0, 'E':   0},
        {'filename': 'odd_ptr_err.cocci',               'L':   0, 'W':   0, 'E':   0},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'L':   0, 'W':   0, 'E':   0},
    ]
}

In [14]:
data3_minimized = DataFrame(report_linux_4_4_50_minimizedtree['statistics'])
data3_minimized = data3_minimized[['filename','L', 'W', 'E']]

data3_minimized


Out[14]:
filename L W E
0 alloc_cast.cocci 2 0 0
1 kzalloc-simple.cocci 0 0 0
2 pool_zalloc-simple.cocci 5 0 0
3 d_find_alias.cocci 0 0 0
4 debugfs_simple_attr.cocci 0 0 0
5 drm-get-put.cocci 0 0 0
6 err_cast.cocci 0 0 0
7 kstrdup.cocci 0 0 0
8 memdup.cocci 0 0 0
9 memdup_user.cocci 4 4 0
10 platform_no_drv_owner.cocci 0 0 0
11 pm_runtime.cocci 0 0 0
12 ptr_ret.cocci 2 2 0
13 resource_size.cocci 0 0 0
14 setup_timer.cocci 11 0 0
15 simple_open.cocci 0 0 0
16 vma_pages.cocci 2 2 0
17 clk_put.cocci 0 0 0
18 devm_free.cocci 0 0 0
19 ifnullfree.cocci 14 14 0
20 iounmap.cocci 1 0 1
21 kfree.cocci 2 0 2
22 kfreeaddr.cocci 0 0 0
23 pci_free_consistent.cocci 0 0 0
24 device_node_continue.cocci 0 0 0
25 fen.cocci 0 0 0
26 ifnull.cocci 5 0 5
27 list_entry_update.cocci 1 0 0
28 use_after_iter.cocci 1 0 1
29 call_kern.cocci 3 0 3
30 double_lock.cocci 11 0 0
31 flags.cocci 0 0 0
32 mini_lock.cocci 45 0 0
33 array_size.cocci 0 0 0
34 badty.cocci 0 0 0
35 boolconv.cocci 2 2 0
36 boolinit.cocci 37 37 0
37 boolreturn.cocci 4 4 0
38 bugon.cocci 92 46 0
39 cond_no_effect.cocci 1 1 0
40 cstptr.cocci 1 0 1
41 doubleinit.cocci 0 0 0
42 ifaddr.cocci 0 0 0
43 ifcol.cocci 3 0 0
44 irqf_oneshot.cocci 2 0 2
45 noderef.cocci 1 0 1
46 of_table.cocci 0 0 0
47 orplus.cocci 5 5 0
48 returnvar.cocci 8 0 0
49 semicolon.cocci 4 0 0
50 warn.cocci 5 0 0
51 badzero.cocci 0 0 0
52 deref_null.cocci 0 0 0
53 eno.cocci 0 0 0
54 kmerr.cocci 0 0 0
55 doublebitand.cocci 20 0 0
56 doubletest.cocci 2 0 0
57 odd_ptr_err.cocci 0 0 0
58 unsigned_lesser_than_zero.cocci 0 0 0

Assigning estimated safety relevance to cocci scripts


In [15]:
my_safetyrel_cocci = {
    'info': {
        # TODO
    },
    # rel: Safety Relevance (1=Highest, 9=lowest)
    'values': [
        {'filename': 'alloc_cast.cocci',                'rel': 5, 'notes': ''},
        {'filename': 'kzalloc-simple.cocci',            'rel': 5, 'notes': ''},
        {'filename': 'pool_zalloc-simple.cocci',        'rel': 8, 'notes': ''},
        {'filename': 'd_find_alias.cocci',              'rel': 3, 'notes': ''},
        {'filename': 'debugfs_simple_attr.cocci',       'rel': 6, 'notes': ''},
        {'filename': 'drm-get-put.cocci',               'rel': 6, 'notes': ''},
        {'filename': 'err_cast.cocci',                  'rel': 5, 'notes': ''},
        {'filename': 'kstrdup.cocci',                   'rel': 4, 'notes': ''},
        {'filename': 'memdup.cocci',                    'rel': 4, 'notes': ''},
        {'filename': 'memdup_user.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'platform_no_drv_owner.cocci',     'rel': 5, 'notes': ''},
        {'filename': 'pm_runtime.cocci',                'rel': 4, 'notes': ''},
        {'filename': 'ptr_ret.cocci',                   'rel': 5, 'notes': ''},
        {'filename': 'resource_size.cocci',             'rel': 4, 'notes': ''},
        {'filename': 'setup_timer.cocci',               'rel': 6, 'notes': ''},
        {'filename': 'simple_open.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'vma_pages.cocci',                 'rel': 4, 'notes': ''},
        {'filename': 'clk_put.cocci',                   'rel': 4, 'notes': ''},
        {'filename': 'devm_free.cocci',                 'rel': 2, 'notes': 'Beware of false positives'},
        {'filename': 'ifnullfree.cocci',                'rel': 7, 'notes': ''},
        {'filename': 'iounmap.cocci',                   'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'kfree.cocci',                     'rel': 2, 'notes': 'Beware of false positives'},
        {'filename': 'kfreeaddr.cocci',                 'rel': 1, 'notes': ''},
        {'filename': 'pci_free_consistent.cocci',       'rel': 4, 'notes': ''},
        {'filename': 'device_node_continue.cocci',      'rel': 3, 'notes': ''},
        {'filename': 'fen.cocci',                       'rel': 5, 'notes': ''},
        {'filename': 'ifnull.cocci',                    'rel': 5, 'notes': 'Beware of false positives'},
        {'filename': 'list_entry_update.cocci',         'rel': 4, 'notes': ''},
        {'filename': 'use_after_iter.cocci',            'rel': 3, 'notes': 'Beware of false positives'},
        {'filename': 'call_kern.cocci',                 'rel': 4, 'notes': ''},
        {'filename': 'double_lock.cocci',               'rel': 5, 'notes': 'Beware of false positives'},
        {'filename': 'flags.cocci',                     'rel': 2, 'notes': ''},
        {'filename': 'mini_lock.cocci',                 'rel': 6, 'notes': 'Beware of false positives'},
        {'filename': 'array_size.cocci',                'rel': 4, 'notes': ''},
        {'filename': 'badty.cocci',                     'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'boolconv.cocci',                  'rel': 9, 'notes': 'Cleanup code only'},
        {'filename': 'boolinit.cocci',                  'rel': 8, 'notes': ''},
        {'filename': 'boolreturn.cocci',                'rel': 6, 'notes': 'Does not affect code behaviour'},
        {'filename': 'bugon.cocci',                     'rel': 8, 'notes': ''},
        {'filename': 'cond_no_effect.cocci',            'rel': 4, 'notes': 'Developed by Nicholas Mc Guire, OSADL'},
        {'filename': 'cstptr.cocci',                    'rel': 1, 'notes': ''},
        {'filename': 'doubleinit.cocci',                'rel': 7, 'notes': 'High rate of false positives'},
        {'filename': 'ifaddr.cocci',                    'rel': 1, 'notes': ''},
        {'filename': 'ifcol.cocci',                     'rel': 7, 'notes': 'High rate of false positives'},
        {'filename': 'irqf_oneshot.cocci',              'rel': 6, 'notes': ''},
        {'filename': 'noderef.cocci',                   'rel': 2, 'notes': ''},
        {'filename': 'of_table.cocci',                  'rel': 2, 'notes': ''},
        {'filename': 'orplus.cocci',                    'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'returnvar.cocci',                 'rel': 7, 'notes': ''},
        {'filename': 'semicolon.cocci',                 'rel': 8, 'notes': ''},
        {'filename': 'warn.cocci',                      'rel': 4, 'notes': ''},
        {'filename': 'badzero.cocci',                   'rel': 6, 'notes': ''},
        {'filename': 'deref_null.cocci',                'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'eno.cocci',                       'rel': 2, 'notes': ''},
        {'filename': 'kmerr.cocci',                     'rel': 2, 'notes': ''},
        {'filename': 'doublebitand.cocci',              'rel': 7, 'notes': 'Beware of false positives'},
        {'filename': 'doubletest.cocci',                'rel': 7, 'notes': 'Beware of false positives'},
        {'filename': 'odd_ptr_err.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'rel': 3, 'notes': ''},
        # TODO: Complete with all the 59 entries
    ]
}

In [16]:
data5_relevance = DataFrame(my_safetyrel_cocci['values'])
data5_relevance = data5_relevance[['filename','rel', 'notes']]

data5_relevance


Out[16]:
filename rel notes
0 alloc_cast.cocci 5
1 kzalloc-simple.cocci 5
2 pool_zalloc-simple.cocci 8
3 d_find_alias.cocci 3
4 debugfs_simple_attr.cocci 6
5 drm-get-put.cocci 6
6 err_cast.cocci 5
7 kstrdup.cocci 4
8 memdup.cocci 4
9 memdup_user.cocci 4
10 platform_no_drv_owner.cocci 5
11 pm_runtime.cocci 4
12 ptr_ret.cocci 5
13 resource_size.cocci 4
14 setup_timer.cocci 6
15 simple_open.cocci 4
16 vma_pages.cocci 4
17 clk_put.cocci 4
18 devm_free.cocci 2 Beware of false positives
19 ifnullfree.cocci 7
20 iounmap.cocci 4 Beware of false positives
21 kfree.cocci 2 Beware of false positives
22 kfreeaddr.cocci 1
23 pci_free_consistent.cocci 4
24 device_node_continue.cocci 3
25 fen.cocci 5
26 ifnull.cocci 5 Beware of false positives
27 list_entry_update.cocci 4
28 use_after_iter.cocci 3 Beware of false positives
29 call_kern.cocci 4
30 double_lock.cocci 5 Beware of false positives
31 flags.cocci 2
32 mini_lock.cocci 6 Beware of false positives
33 array_size.cocci 4
34 badty.cocci 4 Beware of false positives
35 boolconv.cocci 9 Cleanup code only
36 boolinit.cocci 8
37 boolreturn.cocci 6 Does not affect code behaviour
38 bugon.cocci 8
39 cond_no_effect.cocci 4 Developed by Nicholas Mc Guire, OSADL
40 cstptr.cocci 1
41 doubleinit.cocci 7 High rate of false positives
42 ifaddr.cocci 1
43 ifcol.cocci 7 High rate of false positives
44 irqf_oneshot.cocci 6
45 noderef.cocci 2
46 of_table.cocci 2
47 orplus.cocci 4 Beware of false positives
48 returnvar.cocci 7
49 semicolon.cocci 8
50 warn.cocci 4
51 badzero.cocci 6
52 deref_null.cocci 4 Beware of false positives
53 eno.cocci 2
54 kmerr.cocci 2
55 doublebitand.cocci 7 Beware of false positives
56 doubletest.cocci 7 Beware of false positives
57 odd_ptr_err.cocci 4
58 unsigned_lesser_than_zero.cocci 3

Merge tables and aggregate results


In [17]:
#df

In [18]:
# Reference: <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html>
#
# See also tutorial: Merge and Join dataframes with Panda
# <https://www.shanelynn.ie/merge-join-dataframes-python-pandas-index-1/>

# Merge data5_relevance and df on filename
result = pd.merge(data5_relevance,
                  df,
                  on='filename')

# result

In [19]:
def get_aggregate_LWE(dataframe, index):
    """Support function for df_append_aggregate_LWE"""
    # print('DEBUG: get_aggregate_LWE(dataframe={0}, index={1})'.format(id(dataframe), index))
    try:
        this_l = int(dataframe['L'][index])
    except ValueError:
        this_l = '?'
    # print('DEBUG: this_l=', this_l)
    try:
        this_w = int(dataframe['W'][index])
    except ValueError:
        this_w = '?'
    # print('DEBUG: this_w=', this_w)
    try:
        this_e = int(dataframe['E'][index])
    except ValueError:
        this_e = '?'
    # print('DEBUG: this_e=', this_e)
    s = "{0}/{1}/{2}".format(this_l, this_w, this_e)
    return s

#int(data2_full['W'][1])
#print(get_aggregate_LWE(data2_full, 0))
#print(get_aggregate_LWE(data2_full, 3))
#data2_full.count()

def df_append_aggregate_LWE(dataframe, col_name):
    """Append to dataframe a new column col_name
    with strings 'L/W/E' created as the aggregation of columns L, W and E"""
    col = []
    for k in dataframe.index:
        #print("DEBUG: k=", k)
        entry = get_aggregate_LWE(dataframe, k)
        #print("DEBUG: entry=", entry)
        col.append(entry)
    #print("DEBUG: col=", col)
    dataframe[col_name] = col

         
# Create a new column 'LWE_full' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data2_full, "LWE_full")
#data2_full.head()
data2_full
#
# Join data2_full on filename
result = pd.merge(result, data2_full, on='filename')
#result

# Create a new column 'LWE_minimal' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data3_minimized, "LWE_minimized")
#data3_minimized.head()
data3_minimized
#
# Join data3_minimize on filename
result = pd.merge(result, data3_minimized, on='filename')
#result

In [20]:
#result.sort_values(by='rel')
#x = result.sort_values(by='rel')[['filename', 'rel', 'notes', 'path', 'confidence']] # , 'description'
x = result.sort_values(by='rel')[['path', 'filename', 'rel', 'confidence', 'LWE_full', 'LWE_minimized', 'notes']] # , 'description'

#type(x)    # ==> pandas.core.frame.DataFrame
#x

In [21]:
# See <https://stackoverflow.com/questions/25698448/how-to-embed-html-into-ipython-output>
from IPython.core.display import display, HTML
display(HTML('<h3>Result: Prioritized list of Coccinelle scripts</h3>'))
display(HTML(x.to_html()))


Result: Prioritized list of Coccinelle scripts

path filename rel confidence LWE_full LWE_minimized notes
22 scripts/coccinelle/free kfreeaddr.cocci 1 High 0/0/0 0/0/0
42 scripts/coccinelle/misc ifaddr.cocci 1 High 13/0/13 0/0/0
40 scripts/coccinelle/misc cstptr.cocci 1 High 4/0/4 1/0/1
54 scripts/coccinelle/null kmerr.cocci 2 High 13/0/0 0/0/0
53 scripts/coccinelle/null eno.cocci 2 High 1/0/1 0/0/0
18 scripts/coccinelle/free devm_free.cocci 2 Moderate 21/21/0 0/0/0 Beware of false positives
31 scripts/coccinelle/locks flags.cocci 2 High 1/0/1 0/0/0
46 scripts/coccinelle/misc of_table.cocci 2 Medium 3/0/0 0/0/0
45 scripts/coccinelle/misc noderef.cocci 2 High 6/0/6 1/0/1
21 scripts/coccinelle/free kfree.cocci 2 Moderate 63/0/63 2/0/2 Beware of false positives
24 scripts/coccinelle/free device_node_continue.cocci 3 High 3/0/3 0/0/0
28 scripts/coccinelle/iterators use_after_iter.cocci 3 Moderate 27/0/27 1/0/1 Beware of false positives
58 scripts/coccinelle/tests unsigned_lesser_than_zero.cocci 3 Average 18/18/0 0/0/0
3 scripts/coccinelle/api d_find_alias.cocci 3 Moderate 0/0/0 0/0/0
33 scripts/coccinelle/misc array_size.cocci 4 High 8/8/0 0/0/0
47 scripts/coccinelle/misc orplus.cocci 4 Moderate 223/223/0 5/5/0 Beware of false positives
57 scripts/coccinelle/tests odd_ptr_err.cocci 4 High 4/0/0 0/0/0
50 scripts/coccinelle/misc warn.cocci 4 High 18/0/0 5/0/0
27 scripts/coccinelle/iterators list_entry_update.cocci 4 High 4/0/0 1/0/0
52 scripts/coccinelle/null deref_null.cocci 4 Moderate 32/0/32 0/0/0 Beware of false positives
23 scripts/coccinelle/free pci_free_consistent.cocci 4 Moderate 7/0/7 0/0/0
34 scripts/coccinelle/misc badty.cocci 4 Moderate 4/4/0 0/0/0 Beware of false positives
39 scripts/coccinelle/misc cond_no_effect.cocci 4 Moderate 95/95/0 1/1/0 Developed by Nicholas Mc Guire, OSADL
29 scripts/coccinelle/locks call_kern.cocci 4 Moderate 6/0/6 3/0/3
13 scripts/coccinelle/api resource_size.cocci 4 High 10/10/0 0/0/0
11 scripts/coccinelle/api pm_runtime.cocci 4 Medium 0/0/0 0/0/0
8 scripts/coccinelle/api memdup.cocci 4 High 0/0/0 0/0/0
20 scripts/coccinelle/free iounmap.cocci 4 Moderate 5/0/5 1/0/1 Beware of false positives
7 scripts/coccinelle/api kstrdup.cocci 4 High 0/0/0 0/0/0
9 scripts/coccinelle/api memdup_user.cocci 4 High 9/9/0 4/4/0
16 scripts/coccinelle/api vma_pages.cocci 4 High 7/7/0 2/2/0
17 scripts/coccinelle/free clk_put.cocci 4 Medium 0/0/0 0/0/0
15 scripts/coccinelle/api simple_open.cocci 4 High 1/1/0 0/0/0
30 scripts/coccinelle/locks double_lock.cocci 5 Moderate 34/0/0 11/0/0 Beware of false positives
6 scripts/coccinelle/api err_cast.cocci 5 High 2/2/0 0/0/0
12 scripts/coccinelle/api ptr_ret.cocci 5 High 55/55/0 2/2/0
26 scripts/coccinelle/iterators ifnull.cocci 5 Moderate 27/0/27 5/0/5 Beware of false positives
1 scripts/coccinelle/api/alloc kzalloc-simple.cocci 5 High 4/4/0 0/0/0
25 scripts/coccinelle/iterators fen.cocci 5 High 2/0/2 0/0/0
10 scripts/coccinelle/api platform_no_drv_owner.cocci 5 High 20/0/0 0/0/0
0 scripts/coccinelle/api/alloc alloc_cast.cocci 5 High 13/0/0 2/0/0
44 scripts/coccinelle/misc irqf_oneshot.cocci 6 Moderate 52/0/52 2/0/2
4 scripts/coccinelle/debugfs debugfs_simple_attr.cocci 6 N.A. 83/83/0 0/0/0
5 scripts/coccinelle/api drm-get-put.cocci 6 High 48/48/0 0/0/0
51 scripts/coccinelle/null badzero.cocci 6 High 119/119/0 0/0/0
32 scripts/coccinelle/locks mini_lock.cocci 6 Moderate 288/0/0 45/0/0 Beware of false positives
14 scripts/coccinelle/api setup_timer.cocci 6 High 222/0/0 11/0/0
37 scripts/coccinelle/misc boolreturn.cocci 6 High 113/113/0 4/4/0 Does not affect code behaviour
56 scripts/coccinelle/tests doubletest.cocci 7 Moderate 94/0/0 2/0/0 Beware of false positives
55 scripts/coccinelle/tests doublebitand.cocci 7 Moderate 79/0/0 20/0/0 Beware of false positives
19 scripts/coccinelle/free ifnullfree.cocci 7 N.A. 124/124/0 14/14/0
48 scripts/coccinelle/misc returnvar.cocci 7 Moderate 254/0/0 8/0/0
43 scripts/coccinelle/misc ifcol.cocci 7 Low 110/0/0 3/0/0 High rate of false positives
41 scripts/coccinelle/misc doubleinit.cocci 7 Low 159/0/0 0/0/0 High rate of false positives
2 scripts/coccinelle/api/alloc pool_zalloc-simple.cocci 8 N.A. 22/22/0 5/0/0
38 scripts/coccinelle/misc bugon.cocci 8 High 608/304/0 92/46/0
36 scripts/coccinelle/misc boolinit.cocci 8 High 1775/?/? 37/37/0
49 scripts/coccinelle/misc semicolon.cocci 8 Moderate 327/0/0 4/0/0
35 scripts/coccinelle/misc boolconv.cocci 9 N.A. ?/?/? 2/2/0 Cleanup code only

In [22]:
# Collect main results into an Excel spreadsheet
# See <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html>

# TODO: Trying to create file "output.xlsx" returns ModuleNotFoundError
writer = pd.ExcelWriter('evaluate_sel_cocci.xls')
#x.to_excel?

# Write sheets
x.to_excel(writer, 'Summary')
df.to_excel(writer, 'Cocci scripts in linux 4.14-rc6')
data5_relevance.to_excel(writer, 'Estimated SEL relevance')
data2_full.to_excel(writer, 'report_linux_4_14_rc6_fulltree')
data3_minimized.to_excel(writer, 'report_linux_4_4_50_minimized')

writer.save()