In [1]:
# Import all needed libraries
import json
import calendar
import random
import os
import sys
from datetime import date, timedelta
import faker
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from delorean import parse
import matplotlib
# Enable inline plotting
%matplotlib inline
In [2]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)
In [3]:
# Dictionary listing all Semantic Patches in linux-4.14-rc6
# TODO: Should refactor into file 'cocci_linux_4_14_rc6.json'
cocci_linux_4_14_rc6 = {
'info': {
'repository': 'linux-4.14-rc6'
},
'scripts': [
{
'path': 'scripts/coccinelle/api/alloc',
'filename': 'alloc_cast.cocci',
'confidence': 'High',
'description': "Remove casting the values returned by memory allocation functions like "
"kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. "
"This makes an effort to find cases of casting of values returned "
"by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, "
"kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting "
"as it is not required. The result in the patch case may need some reformatting."
},
{
'path': 'scripts/coccinelle/api/alloc',
'filename': 'kzalloc-simple.cocci',
'confidence': 'High',
'description': "Use kzalloc rather than kmalloc followed by memset with 0. "
"This considers some simple cases that are common and easy to validate. "
"Note in particular that there are no ...s in the rule, so all of "
"the matched code has to be contiguous"
},
{
'path': 'scripts/coccinelle/api/alloc',
'filename': 'pool_zalloc-simple.cocci',
'confidence': 'N.A.',
'description': "Use *_pool_zalloc rather than *_pool_alloc followed by memset with 0"
},
# TODO: Complete fields for all the 59 entries
{
'path': 'scripts/coccinelle/api',
'filename': 'd_find_alias.cocci',
'confidence': 'Moderate',
'description': "Make sure calls to d_find_alias() have a corresponding call to dput()."
},
{
'path': 'scripts/coccinelle/debugfs',
'filename': 'debugfs_simple_attr.cocci',
'confidence': 'N.A.',
'description': "Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE "
"for debugfs files. "
"Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file() imposes some "
"significant overhead as compared to DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe()."
},
{
'path': 'scripts/coccinelle/api',
'filename': 'drm-get-put.cocci',
'confidence': 'High',
'description': "Use drm_*_get() and drm_*_put() helpers instead of drm_*_reference() "
"and drm_*_unreference() helpers."
},
{
'path': 'scripts/coccinelle/api',
'filename': 'err_cast.cocci',
'confidence': 'High',
'description': "Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...))"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'kstrdup.cocci',
'confidence': 'High',
'description': "Use kstrdup rather than duplicating its implementation"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'memdup.cocci',
'confidence': 'High',
'description': "Use kmemdup rather than duplicating its implementation"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'memdup_user.cocci',
'confidence': 'High',
'description': "Use memdup_user rather than duplicating its implementation. "
"This is a little bit restricted to reduce false positives"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'platform_no_drv_owner.cocci',
'confidence': 'High',
'description': "Remove .owner field if calls are used which set it automatically"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'pm_runtime.cocci',
'confidence': 'Medium',
'description': "Make sure pm_runtime_* calls does not use unnecessary IS_ERR_VALUE"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'ptr_ret.cocci',
'confidence': 'High',
'description': "Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'resource_size.cocci',
'confidence': 'High',
'description': "Use resource_size function on resource object instead of explicit computation."
},
{
'path': 'scripts/coccinelle/api',
'filename': 'setup_timer.cocci',
'confidence': 'High',
'description': "Use setup_timer function instead of initializing timer with the function and data fields"
},
{
'path': 'scripts/coccinelle/api',
'filename': 'simple_open.cocci',
'confidence': 'High',
'description': "Remove an open coded simple_open() function "
"and replace file operations references to the function with simple_open() instead."
},
{
'path': 'scripts/coccinelle/api',
'filename': 'vma_pages.cocci',
'confidence': 'High',
'description': "Use vma_pages function on vma object instead of explicit computation."
},
{
'path': 'scripts/coccinelle/free',
'filename': 'clk_put.cocci',
'confidence': 'Medium',
'description': "Find missing clk_puts. This only signals a missing clk_put when "
"there is a clk_put later in the same function. "
"False positives can be due to loops."
},
{
'path': 'scripts/coccinelle/free',
'filename': 'devm_free.cocci',
'confidence': 'Moderate',
'description': "Find uses of standard freeing functions on values allocated using devm_ functions. "
"Values allocated using the devm_functions are freed when the device is detached, "
"and thus the use of the standard freeing function would cause a double free. "
"See Documentation/driver-model/devres.txt for more information. "
"A difficulty of detecting this problem is that the standard freeing function might "
"be called from a different function than the one containing the allocation function. "
"It is thus necessary to make the connection between the allocation function "
"and the freeing function. "
"Here this is done using the specific argument text, which is prone to false positives. "
"There is no rule for the request_region and request_mem_region variants because "
"this heuristic seems to be a bit less reliable in these cases."
},
{
'path': 'scripts/coccinelle/free',
'filename': 'ifnullfree.cocci',
'confidence': 'N.A.',
'description': "NULL check before some freeing functions is not needed. "
"Based on checkpatch warning \"kfree(NULL) is safe this check is probably "
"not required\" and kfreeaddr.cocci"
},
{
'path': 'scripts/coccinelle/free',
'filename': 'iounmap.cocci',
'confidence': 'Moderate',
'description': "Find missing iounmaps. "
"This only signals a missing iounmap when there is an iounmap later in the same function. "
"False positives can be due to loops."
},
{
'path': 'scripts/coccinelle/free',
'filename': 'kfree.cocci',
'confidence': 'Moderate',
'description': "Find a use after free. "
"Values of variables may imply that some execution paths are not possible, resulting in false positives. "
"Another source of false positives are macros such as "
"SCTP_DBG_OBJCNT_DEC that do not actually evaluate their argument"
},
{
'path': 'scripts/coccinelle/free',
'filename': 'kfreeaddr.cocci',
'confidence': 'High',
'description': "Free of a structure field"
},
{
'path': 'scripts/coccinelle/free',
'filename': 'pci_free_consistent.cocci',
'confidence': 'Moderate',
'description': "Find missing pci_free_consistent for every pci_alloc_consistent."
},
{
'path': 'scripts/coccinelle/free',
'filename': 'device_node_continue.cocci',
'confidence': 'High',
'description': "Device node iterators put the previous value of the index variable, "
"so an explicit put causes a double put."
},
{
'path': 'scripts/coccinelle/iterators',
'filename': 'fen.cocci',
'confidence': 'High',
'description': "These iterators only exit normally when the loop cursor is NULL, "
"so there is no point to call of_node_put on the final value."
},
{
'path': 'scripts/coccinelle/iterators',
'filename': 'ifnull.cocci',
'confidence': 'Moderate',
'description': "Many iterators have the property that the first argument "
"is always bound to a real list element, never NULL. "
"False positives arise for some iterators that do not have this property, "
"or in cases when the loop cursor is reassigned. The latter should only "
"happen when the matched code is on the way to a loop exit (break, goto, or return)."
},
{
'path': 'scripts/coccinelle/iterators',
'filename': 'list_entry_update.cocci',
'confidence': 'High',
'description': "list_for_each_entry uses its first argument to get "
"from one element of the list to the next, so it is usually not a good idea to reassign it. "
"The first rule finds such a reassignment and the second rule checks that there is "
"a path from the reassignment back to the top of the loop."
},
{
'path': 'scripts/coccinelle/iterators',
'filename': 'use_after_iter.cocci',
'confidence': 'Moderate',
'description': "If list_for_each_entry, etc complete a traversal of the list, "
"the iterator variable ends up pointing to an address at an offset from the list head, "
"and not a meaningful structure. Thus this value should not be used after "
"the end of the iterator. "
"False positives arise when there is a goto in the iterator and the reported reference "
"is at the label of this goto. Some flag tests may also cause a report to be a false positive."
},
{
'path': 'scripts/coccinelle/locks',
'filename': 'call_kern.cocci',
'confidence': 'Moderate',
'description': "Find functions that refer to GFP_KERNEL but are called with locks held. "
"The proposed change of converting the GFP_KERNEL is not necessarily the correct one. "
"It may be desired to unlock the lock, or to not call the function under the lock in the first place."
},
{
'path': 'scripts/coccinelle/locks',
'filename': 'double_lock.cocci',
'confidence': 'Moderate',
'description': "Find double locks. False positives may occur when some paths cannot occur at execution, "
"due to the values of variables, and when there is an intervening function call that releases the lock."
},
{
'path': 'scripts/coccinelle/locks',
'filename': 'flags.cocci',
'confidence': 'High',
'description': "Find nested lock+irqsave functions that use the same flags variables"
},
{
'path': 'scripts/coccinelle/locks',
'filename': 'mini_lock.cocci',
'confidence': 'Moderate',
'description': "Find missing unlocks. "
"This semantic match considers the specific case where the unlock is missing from an if branch, "
"and there is a lock before the if and an unlock after the if. "
"False positives are due to cases where the if branch represents a case where the function "
"is supposed to exit with the lock held, or where there is some preceding function call "
"that releases the lock."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'array_size.cocci',
'confidence': 'High',
'description': "Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element "
"This makes an effort to find cases where ARRAY_SIZE can be used such as where "
"there is a division of sizeof the array by the sizeof its first element or by any "
"indexed element or the element type. It replaces the division of the two sizeofs by ARRAY_SIZE."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'badty.cocci',
'confidence': 'Moderate',
'description': "Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element "
"This makes an effort to find cases where the argument to sizeof is wrong in memory "
"allocation functions by checking the type of the allocated memory when it is a double pointer "
"and ensuring the sizeof argument takes a pointer to the the memory being allocated. "
"There are false positives in cases the sizeof argument is not used in constructing the return value. "
"The result may need some reformatting."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'boolconv.cocci',
'confidence': 'N.A.',
'description': "Remove unneeded conversion to bool. "
"Relational and logical operators evaluate to bool, explicit conversion is overly verbose and unneeded"
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'boolinit.cocci',
'confidence': 'High',
'description': "Bool initializations should use true and false. "
"Bool tests don't need comparisons. "
"Based on contributions from Joe Perches, Rusty Russell and Bruce W Allan."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'boolreturn.cocci',
'confidence': 'High',
'description': "Return statements in functions returning bool should use true/false instead of 1/0."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'bugon.cocci',
'confidence': 'High',
'description': "Use BUG_ON instead of a if condition followed by BUG. "
"This makes an effort to find cases where BUG() follows an if condition "
"on an expression and replaces the if condition and BUG() with a BUG_ON "
"having the conditional expression of the if statement as argument."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'cond_no_effect.cocci',
'confidence': 'Moderate',
'description': "Find conditions where if and else branch are functionally identical. "
"There can be false positives in cases where the positional information is used (as with lockdep) "
"or where the identity is a placeholder for not yet handled cases. "
"Unfortunately there also seems to be a tendency to use the last if else/else "
"as a \"default behavior\" - which some might consider a legitimate coding pattern. "
"From discussion on kernelnewbies though it seems that this is not really an accepted pattern "
"and if at all it would need to be commented"
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'cstptr.cocci',
'confidence': 'High',
'description': "PTR_ERR should be applied before its argument is reassigned, typically to NULL"
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'doubleinit.cocci',
'confidence': 'Low',
'description': "Find duplicate field initializations. "
"This has a high rate of false positives due to #ifdefs, which Coccinelle is not aware of "
"in a structure initialization."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'ifaddr.cocci',
'confidence': 'High',
'description': "The address of a variable or field is likely always to be non-zero."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'ifcol.cocci',
'confidence': 'Low',
'description': "Find confusingly indented code in or after an if. An if branch should be indented. "
"The code following an if should not be indented. Sometimes, code after an if that is indented "
"is actually intended to be part of the if branch. "
"This has a high rate of false positives, because Coccinelle's column calculation "
"does not distinguish between spaces and tabs, so code that is not visually aligned "
"may be considered to be in the same column."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'irqf_oneshot.cocci',
'confidence': 'Moderate',
'description': "Since commit 1c6c69525b40 (genirq: Reject bogus threaded irq requests) "
"threaded IRQs without a primary handler need to be requested with IRQF_ONESHOT, "
"otherwise the request will fail. "
"So pass the IRQF_ONESHOT flag in this case."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'noderef.cocci',
'confidence': 'High',
'description': "sizeof when applied to a pointer typed expression gives the size of the pointer"
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'of_table.cocci',
'confidence': 'Medium',
'description': "Make sure (of/i2c/platform)_device_id tables are NULL terminated"
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'orplus.cocci',
'confidence': 'Moderate',
'description': "Check for constants that are added but are used elsewhere as bitmasks. "
"The results should be checked manually to ensure that the nonzero bits "
"in the two constants are actually disjoint."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'returnvar.cocci',
'confidence': 'Moderate',
'description': "Remove unneeded variable used to store return value."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'semicolon.cocci',
'confidence': 'Moderate',
'description': "Remove unneeded semicolon."
},
{
'path': 'scripts/coccinelle/misc',
'filename': 'warn.cocci',
'confidence': 'High',
'description': "Use WARN(1,...) rather than printk followed by WARN_ON(1)"
},
{
'path': 'scripts/coccinelle/null',
'filename': 'badzero.cocci',
'confidence': 'High',
'description': "Compare pointer-typed values to NULL rather than 0. "
"This makes an effort to choose between !x and x == NULL. "
"!x is used if it has previously been used with the function used to initialize x. "
"This relies on type information. "
"More type information can be obtained using the option -all_includes and the option -I "
"to specify an include path."
},
{
'path': 'scripts/coccinelle/null',
'filename': 'deref_null.cocci',
'confidence': 'Moderate',
'description': "A variable is dereferenced under a NULL test. "
"Even though it is known to be NULL."
},
{
'path': 'scripts/coccinelle/null',
'filename': 'eno.cocci',
'confidence': 'High',
'description': "The various basic memory allocation functions don't return ERR_PTR"
},
{
'path': 'scripts/coccinelle/null',
'filename': 'kmerr.cocci',
'confidence': 'High',
'description': "This semantic patch looks for kmalloc etc that are not followed by a NULL check. "
"It only gives a report in the case where there is some error handling code later "
"in the function, which may be helpful in determining what the error handling code "
"for the call to kmalloc etc should be."
},
{
'path': 'scripts/coccinelle/tests',
'filename': 'doublebitand.cocci',
'confidence': 'Moderate',
'description': "Find bit operations that include the same argument more than once. "
"One source of false positives is when the argument performs a side effect. "
"Another source of false positives is when a neutral value such as 0 for | is used "
"to indicate no information, to maintain the same structure as other similar expressions"
},
{
'path': 'scripts/coccinelle/tests',
'filename': 'doubletest.cocci',
'confidence': 'Moderate',
'description': "Find &&/|| operations that include the same argument more than once. "
"A common source of false positives is when the argument performs a side effect."
},
{
'path': 'scripts/coccinelle/tests',
'filename': 'odd_ptr_err.cocci',
'confidence': 'High',
'description': "PTR_ERR should access the value just tested by IS_ERR. "
"There can be false positives in the patch case, where it is the call to IS_ERR that is wrong."
},
{
'path': 'scripts/coccinelle/tests',
'filename': 'unsigned_lesser_than_zero.cocci',
'confidence': 'Average',
'description': "Unsigned expressions cannot be lesser than zero. "
"Presence of comparisons 'unsigned (<|<=|>|>=) 0' often indicates a bug, usually wrong type of variable. "
"To reduce number of false positives following tests have been added: "
"- parts of range checks are skipped, eg. \"if (u < 0 || u > 15) ...\", developers prefer "
"to keep such code, "
"- comparisons \"<= 0\" and \"> 0\" are performed only on results of signed functions/macros,"
"- hardcoded list of signed functions/macros with always non-negative result is used "
"to avoid false positives difficult to detect by other ways"
},
]
}
# cocci_linux_4_14_rc6
In [4]:
#print(cocci_linux_4_14_rc6)
len(cocci_linux_4_14_rc6['scripts'])
Out[4]:
In [5]:
data = cocci_linux_4_14_rc6['scripts']
df = DataFrame(data) # , index=cocci_linux_4_14_rc6['scripts']['filename']
#df.count()
#df.describe()
#json.dumps(data)
#print(df.index)
#print()
#print(df.filename)
#print()
#df.head()
In [6]:
# Rearrange columns
cols = ['path', 'filename', 'confidence', 'description']
df = df[cols]
#df
In [7]:
# Notice that text of column "description" as shown above is truncated,
# however the cell inside the DataFrame contains the full value
#df.iloc[0]['description']
In [8]:
# Display full text for column "description"
# See <https://stackoverflow.com/questions/23388810/ipython-notebook-output-cell-is-truncating-contents-of-my-list>
# See <http://pandas.pydata.org/pandas-docs/stable/options.html>
from pandas import DataFrame
from IPython.display import HTML
pd.options.display.max_colwidth = 2000
pd.options.display.max_seq_items = 200
HTML(df.to_html())
Out[8]:
In [9]:
# Export the dataframe to an Excel file
#df.to_excel('test.xls')
In [10]:
# Report against linux-4.14-rc6 full-tree
report_linux_4_14_rc6_fulltree = {
'info': {
'note': """Report against linux-4.14-rc6 full-tree""",
'script_start': 'Thu Oct 5 13:11:29 CEST 2017',
'src_sha': 'd81fa669e3de7eb8a631d7d95dac5fbcb2bf9d4e',
},
'statistics': [
{'filename': 'alloc_cast.cocci', 'L': 13, 'W': 0, 'E': 0},
{'filename': 'kzalloc-simple.cocci', 'L': 4, 'W': 4, 'E': 0},
{'filename': 'pool_zalloc-simple.cocci', 'L': 22, 'W': 22, 'E': 0},
{'filename': 'd_find_alias.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'debugfs_simple_attr.cocci', 'L': 83, 'W': 83, 'E': 0},
{'filename': 'drm-get-put.cocci', 'L': 48, 'W': 48, 'E': 0},
{'filename': 'err_cast.cocci', 'L': 2, 'W': 2, 'E': 0},
{'filename': 'kstrdup.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'memdup.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'memdup_user.cocci', 'L': 9, 'W': 9, 'E': 0},
{'filename': 'platform_no_drv_owner.cocci', 'L': 20, 'W': 0, 'E': 0},
{'filename': 'pm_runtime.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ptr_ret.cocci', 'L': 55, 'W': 55, 'E': 0},
{'filename': 'resource_size.cocci', 'L': 10, 'W': 10, 'E': 0},
{'filename': 'setup_timer.cocci', 'L': 222, 'W': 0, 'E': 0},
{'filename': 'simple_open.cocci', 'L': 1, 'W': 1, 'E': 0},
{'filename': 'vma_pages.cocci', 'L': 7, 'W': 7, 'E': 0},
{'filename': 'clk_put.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'devm_free.cocci', 'L': 21, 'W': 21, 'E': 0},
{'filename': 'ifnullfree.cocci', 'L': 124, 'W': 124, 'E': 0},
{'filename': 'iounmap.cocci', 'L': 5, 'W': 0, 'E': 5},
{'filename': 'kfree.cocci', 'L': 63, 'W': 0, 'E': 63},
{'filename': 'kfreeaddr.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'pci_free_consistent.cocci', 'L': 7, 'W': 0, 'E': 7},
{'filename': 'device_node_continue.cocci', 'L': 3, 'W': 0, 'E': 3},
{'filename': 'fen.cocci', 'L': 2, 'W': 0, 'E': 2},
{'filename': 'ifnull.cocci', 'L': 27, 'W': 0, 'E': 27},
{'filename': 'list_entry_update.cocci', 'L': 4, 'W': 0, 'E': 0},
{'filename': 'use_after_iter.cocci', 'L': 27, 'W': 0, 'E': 27},
{'filename': 'call_kern.cocci', 'L': 6, 'W': 0, 'E': 6},
{'filename': 'double_lock.cocci', 'L': 34, 'W': 0, 'E': 0},
{'filename': 'flags.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'mini_lock.cocci', 'L': 288, 'W': 0, 'E': 0},
{'filename': 'array_size.cocci', 'L': 8, 'W': 8, 'E': 0},
{'filename': 'badty.cocci', 'L': 4, 'W': 4, 'E': 0},
{'filename': 'boolconv.cocci', 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
{'filename': 'boolinit.cocci', 'L': 1775, 'W': np.NaN, 'E': np.NaN},
{'filename': 'boolreturn.cocci', 'L': 113, 'W': 113, 'E': 0},
{'filename': 'bugon.cocci', 'L': 608, 'W': 304, 'E': 0},
{'filename': 'cond_no_effect.cocci', 'L': 95, 'W': 95, 'E': 0},
{'filename': 'cstptr.cocci', 'L': 4, 'W': 0, 'E': 4},
{'filename': 'doubleinit.cocci', 'L': 159, 'W': 0, 'E': 0},
{'filename': 'ifaddr.cocci', 'L': 13, 'W': 0, 'E': 13},
{'filename': 'ifcol.cocci', 'L': 110, 'W': 0, 'E': 0},
{'filename': 'irqf_oneshot.cocci', 'L': 52, 'W': 0, 'E': 52},
{'filename': 'noderef.cocci', 'L': 6, 'W': 0, 'E': 6},
{'filename': 'of_table.cocci', 'L': 3, 'W': 0, 'E': 0},
{'filename': 'orplus.cocci', 'L': 223, 'W': 223, 'E': 0},
{'filename': 'returnvar.cocci', 'L': 254, 'W': 0, 'E': 0},
{'filename': 'semicolon.cocci', 'L': 327, 'W': 0, 'E': 0},
{'filename': 'warn.cocci', 'L': 18, 'W': 0, 'E': 0},
{'filename': 'badzero.cocci', 'L': 119, 'W': 119, 'E': 0},
{'filename': 'deref_null.cocci', 'L': 32, 'W': 0, 'E': 32},
{'filename': 'eno.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'kmerr.cocci', 'L': 13, 'W': 0, 'E': 0},
{'filename': 'doublebitand.cocci', 'L': 79, 'W': 0, 'E': 0},
{'filename': 'doubletest.cocci', 'L': 94, 'W': 0, 'E': 0},
{'filename': 'odd_ptr_err.cocci', 'L': 4, 'W': 0, 'E': 0},
{'filename': 'unsigned_lesser_than_zero.cocci', 'L': 18, 'W': 18, 'E': 0},
]
}
In [11]:
data2_full = DataFrame(report_linux_4_14_rc6_fulltree['statistics'])
data2_full = data2_full[['filename','L', 'W', 'E']]
data2_full
Out[11]:
In [12]:
#data2_full.plot()
#data2_full[['L', 'W', 'E']].plot()
#data2_full[['filename','L', 'W', 'E']].plot(kind='bar')
#len(data2_full['filename'].unique())
In [13]:
# Report against linux-4.4.50 minimized-tree
report_linux_4_4_50_minimizedtree = {
'info': {
'note': """Report against linux-4.4.50 minimzed-tree""",
'script_start': 'Mon Oct 23 16:29:52 IST 2017',
'cocci_sha': '9e66317d3c92ddaab330c125dfe9d06eee268aff',
'src_sha': '90dcab23bbc81fbfa25dfdb91d4ce974a69bd210',
},
'statistics': [
{'filename': 'alloc_cast.cocci', 'L': 2, 'W': 0, 'E': 0},
{'filename': 'kzalloc-simple.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'pool_zalloc-simple.cocci', 'L': 5, 'W': 0, 'E': 0},
{'filename': 'd_find_alias.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'debugfs_simple_attr.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'drm-get-put.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'err_cast.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'kstrdup.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'memdup.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'memdup_user.cocci', 'L': 4, 'W': 4, 'E': 0},
{'filename': 'platform_no_drv_owner.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'pm_runtime.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ptr_ret.cocci', 'L': 2, 'W': 2, 'E': 0},
{'filename': 'resource_size.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'setup_timer.cocci', 'L': 11, 'W': 0, 'E': 0},
{'filename': 'simple_open.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'vma_pages.cocci', 'L': 2, 'W': 2, 'E': 0},
{'filename': 'clk_put.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'devm_free.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ifnullfree.cocci', 'L': 14, 'W': 14, 'E': 0},
{'filename': 'iounmap.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'kfree.cocci', 'L': 2, 'W': 0, 'E': 2},
{'filename': 'kfreeaddr.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'pci_free_consistent.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'device_node_continue.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'fen.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ifnull.cocci', 'L': 5, 'W': 0, 'E': 5},
{'filename': 'list_entry_update.cocci', 'L': 1, 'W': 0, 'E': 0},
{'filename': 'use_after_iter.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'call_kern.cocci', 'L': 3, 'W': 0, 'E': 3},
{'filename': 'double_lock.cocci', 'L': 11, 'W': 0, 'E': 0},
{'filename': 'flags.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'mini_lock.cocci', 'L': 45, 'W': 0, 'E': 0},
{'filename': 'array_size.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'badty.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'boolconv.cocci', 'L': 2, 'W': 2, 'E': 0},
{'filename': 'boolinit.cocci', 'L': 37, 'W': 37, 'E': 0},
{'filename': 'boolreturn.cocci', 'L': 4, 'W': 4, 'E': 0},
{'filename': 'bugon.cocci', 'L': 92, 'W': 46, 'E': 0},
{'filename': 'cond_no_effect.cocci', 'L': 1, 'W': 1, 'E': 0},
{'filename': 'cstptr.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'doubleinit.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ifaddr.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'ifcol.cocci', 'L': 3, 'W': 0, 'E': 0},
{'filename': 'irqf_oneshot.cocci', 'L': 2, 'W': 0, 'E': 2},
{'filename': 'noderef.cocci', 'L': 1, 'W': 0, 'E': 1},
{'filename': 'of_table.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'orplus.cocci', 'L': 5, 'W': 5, 'E': 0},
{'filename': 'returnvar.cocci', 'L': 8, 'W': 0, 'E': 0},
{'filename': 'semicolon.cocci', 'L': 4, 'W': 0, 'E': 0},
{'filename': 'warn.cocci', 'L': 5, 'W': 0, 'E': 0},
{'filename': 'badzero.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'deref_null.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'eno.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'kmerr.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'doublebitand.cocci', 'L': 20, 'W': 0, 'E': 0},
{'filename': 'doubletest.cocci', 'L': 2, 'W': 0, 'E': 0},
{'filename': 'odd_ptr_err.cocci', 'L': 0, 'W': 0, 'E': 0},
{'filename': 'unsigned_lesser_than_zero.cocci', 'L': 0, 'W': 0, 'E': 0},
]
}
In [14]:
data3_minimized = DataFrame(report_linux_4_4_50_minimizedtree['statistics'])
data3_minimized = data3_minimized[['filename','L', 'W', 'E']]
data3_minimized
Out[14]:
In [15]:
my_safetyrel_cocci = {
'info': {
# TODO
},
# rel: Safety Relevance (1=Highest, 9=lowest)
'values': [
{'filename': 'alloc_cast.cocci', 'rel': 5, 'notes': ''},
{'filename': 'kzalloc-simple.cocci', 'rel': 5, 'notes': ''},
{'filename': 'pool_zalloc-simple.cocci', 'rel': 8, 'notes': ''},
{'filename': 'd_find_alias.cocci', 'rel': 3, 'notes': ''},
{'filename': 'debugfs_simple_attr.cocci', 'rel': 6, 'notes': ''},
{'filename': 'drm-get-put.cocci', 'rel': 6, 'notes': ''},
{'filename': 'err_cast.cocci', 'rel': 5, 'notes': ''},
{'filename': 'kstrdup.cocci', 'rel': 4, 'notes': ''},
{'filename': 'memdup.cocci', 'rel': 4, 'notes': ''},
{'filename': 'memdup_user.cocci', 'rel': 4, 'notes': ''},
{'filename': 'platform_no_drv_owner.cocci', 'rel': 5, 'notes': ''},
{'filename': 'pm_runtime.cocci', 'rel': 4, 'notes': ''},
{'filename': 'ptr_ret.cocci', 'rel': 5, 'notes': ''},
{'filename': 'resource_size.cocci', 'rel': 4, 'notes': ''},
{'filename': 'setup_timer.cocci', 'rel': 6, 'notes': ''},
{'filename': 'simple_open.cocci', 'rel': 4, 'notes': ''},
{'filename': 'vma_pages.cocci', 'rel': 4, 'notes': ''},
{'filename': 'clk_put.cocci', 'rel': 4, 'notes': ''},
{'filename': 'devm_free.cocci', 'rel': 2, 'notes': 'Beware of false positives'},
{'filename': 'ifnullfree.cocci', 'rel': 7, 'notes': ''},
{'filename': 'iounmap.cocci', 'rel': 4, 'notes': 'Beware of false positives'},
{'filename': 'kfree.cocci', 'rel': 2, 'notes': 'Beware of false positives'},
{'filename': 'kfreeaddr.cocci', 'rel': 1, 'notes': ''},
{'filename': 'pci_free_consistent.cocci', 'rel': 4, 'notes': ''},
{'filename': 'device_node_continue.cocci', 'rel': 3, 'notes': ''},
{'filename': 'fen.cocci', 'rel': 5, 'notes': ''},
{'filename': 'ifnull.cocci', 'rel': 5, 'notes': 'Beware of false positives'},
{'filename': 'list_entry_update.cocci', 'rel': 4, 'notes': ''},
{'filename': 'use_after_iter.cocci', 'rel': 3, 'notes': 'Beware of false positives'},
{'filename': 'call_kern.cocci', 'rel': 4, 'notes': ''},
{'filename': 'double_lock.cocci', 'rel': 5, 'notes': 'Beware of false positives'},
{'filename': 'flags.cocci', 'rel': 2, 'notes': ''},
{'filename': 'mini_lock.cocci', 'rel': 6, 'notes': 'Beware of false positives'},
{'filename': 'array_size.cocci', 'rel': 4, 'notes': ''},
{'filename': 'badty.cocci', 'rel': 4, 'notes': 'Beware of false positives'},
{'filename': 'boolconv.cocci', 'rel': 9, 'notes': 'Cleanup code only'},
{'filename': 'boolinit.cocci', 'rel': 8, 'notes': ''},
{'filename': 'boolreturn.cocci', 'rel': 6, 'notes': 'Does not affect code behaviour'},
{'filename': 'bugon.cocci', 'rel': 8, 'notes': ''},
{'filename': 'cond_no_effect.cocci', 'rel': 4, 'notes': 'Developed by Nicholas Mc Guire, OSADL'},
{'filename': 'cstptr.cocci', 'rel': 1, 'notes': ''},
{'filename': 'doubleinit.cocci', 'rel': 7, 'notes': 'High rate of false positives'},
{'filename': 'ifaddr.cocci', 'rel': 1, 'notes': ''},
{'filename': 'ifcol.cocci', 'rel': 7, 'notes': 'High rate of false positives'},
{'filename': 'irqf_oneshot.cocci', 'rel': 6, 'notes': ''},
{'filename': 'noderef.cocci', 'rel': 2, 'notes': ''},
{'filename': 'of_table.cocci', 'rel': 2, 'notes': ''},
{'filename': 'orplus.cocci', 'rel': 4, 'notes': 'Beware of false positives'},
{'filename': 'returnvar.cocci', 'rel': 7, 'notes': ''},
{'filename': 'semicolon.cocci', 'rel': 8, 'notes': ''},
{'filename': 'warn.cocci', 'rel': 4, 'notes': ''},
{'filename': 'badzero.cocci', 'rel': 6, 'notes': ''},
{'filename': 'deref_null.cocci', 'rel': 4, 'notes': 'Beware of false positives'},
{'filename': 'eno.cocci', 'rel': 2, 'notes': ''},
{'filename': 'kmerr.cocci', 'rel': 2, 'notes': ''},
{'filename': 'doublebitand.cocci', 'rel': 7, 'notes': 'Beware of false positives'},
{'filename': 'doubletest.cocci', 'rel': 7, 'notes': 'Beware of false positives'},
{'filename': 'odd_ptr_err.cocci', 'rel': 4, 'notes': ''},
{'filename': 'unsigned_lesser_than_zero.cocci', 'rel': 3, 'notes': ''},
# TODO: Complete with all the 59 entries
]
}
In [16]:
data5_relevance = DataFrame(my_safetyrel_cocci['values'])
data5_relevance = data5_relevance[['filename','rel', 'notes']]
data5_relevance
Out[16]:
In [17]:
#df
In [18]:
# Reference: <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html>
#
# See also tutorial: Merge and Join dataframes with Panda
# <https://www.shanelynn.ie/merge-join-dataframes-python-pandas-index-1/>
# Merge data5_relevance and df on filename
result = pd.merge(data5_relevance,
df,
on='filename')
# result
In [19]:
def get_aggregate_LWE(dataframe, index):
"""Support function for df_append_aggregate_LWE"""
# print('DEBUG: get_aggregate_LWE(dataframe={0}, index={1})'.format(id(dataframe), index))
try:
this_l = int(dataframe['L'][index])
except ValueError:
this_l = '?'
# print('DEBUG: this_l=', this_l)
try:
this_w = int(dataframe['W'][index])
except ValueError:
this_w = '?'
# print('DEBUG: this_w=', this_w)
try:
this_e = int(dataframe['E'][index])
except ValueError:
this_e = '?'
# print('DEBUG: this_e=', this_e)
s = "{0}/{1}/{2}".format(this_l, this_w, this_e)
return s
#int(data2_full['W'][1])
#print(get_aggregate_LWE(data2_full, 0))
#print(get_aggregate_LWE(data2_full, 3))
#data2_full.count()
def df_append_aggregate_LWE(dataframe, col_name):
"""Append to dataframe a new column col_name
with strings 'L/W/E' created as the aggregation of columns L, W and E"""
col = []
for k in dataframe.index:
#print("DEBUG: k=", k)
entry = get_aggregate_LWE(dataframe, k)
#print("DEBUG: entry=", entry)
col.append(entry)
#print("DEBUG: col=", col)
dataframe[col_name] = col
# Create a new column 'LWE_full' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data2_full, "LWE_full")
#data2_full.head()
data2_full
#
# Join data2_full on filename
result = pd.merge(result, data2_full, on='filename')
#result
# Create a new column 'LWE_minimal' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data3_minimized, "LWE_minimized")
#data3_minimized.head()
data3_minimized
#
# Join data3_minimize on filename
result = pd.merge(result, data3_minimized, on='filename')
#result
In [20]:
#result.sort_values(by='rel')
#x = result.sort_values(by='rel')[['filename', 'rel', 'notes', 'path', 'confidence']] # , 'description'
x = result.sort_values(by='rel')[['path', 'filename', 'rel', 'confidence', 'LWE_full', 'LWE_minimized', 'notes']] # , 'description'
#type(x) # ==> pandas.core.frame.DataFrame
#x
In [21]:
# See <https://stackoverflow.com/questions/25698448/how-to-embed-html-into-ipython-output>
from IPython.core.display import display, HTML
display(HTML('<h3>Result: Prioritized list of Coccinelle scripts</h3>'))
display(HTML(x.to_html()))
In [22]:
# Collect main results into an Excel spreadsheet
# See <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.to_excel.html>
# TODO: Trying to create file "output.xlsx" returns ModuleNotFoundError
writer = pd.ExcelWriter('evaluate_sel_cocci.xls')
#x.to_excel?
# Write sheets
x.to_excel(writer, 'Summary')
df.to_excel(writer, 'Cocci scripts in linux 4.14-rc6')
data5_relevance.to_excel(writer, 'Estimated SEL relevance')
data2_full.to_excel(writer, 'report_linux_4_14_rc6_fulltree')
data3_minimized.to_excel(writer, 'report_linux_4_4_50_minimized')
writer.save()