MATCH 
    (file:JavaFile)-[:DEFINES]->
    (zk_coupled_class:JavaClass)
    -[zk_couple:COUPLES]->
    (zk_class:JavaClass) 
WHERE 
    zk_class.name =~ '(?i)^org\\.zkoss.*' AND 
    zk_coupled_class.name =~ '(?i).*report.*'
WITH file, zk_coupled_class, zk_couple
MATCH 
    (file:JavaFile)-[:DEFINES]->
    (zk_coupled_class:JavaClass)
    -[db_couple:COUPLES]->
    (db_class:JavaClass) 
WHERE db_class.name =~ '(?i)^java\\.sql.*' 
RETURN DISTINCT file, zk_couple, db_couple

In [1]:
import sys

# add project root directory to python path to enable import of saapy
if ".." not in sys.path:
    sys.path.append('..')
    
import keyring
from functools import partial
from neo4j.v1 import GraphDatabase, basic_auth


def neo4j_driver():
    neo4j_service = "test_neo4j"
    neo4j_user = "neo4j"
    neo4j_password = keyring.get_password(neo4j_service, neo4j_user)
    neo4j_url = "bolt://localhost"
    driver = GraphDatabase.driver(neo4j_url, auth=basic_auth(neo4j_user, neo4j_password))
    return driver

In [2]:
def run_query(tx, query, args):
    return tx.run(query, args)

class DryRunTx:
    def __init__(self):
        self.runs = []
    def run(self, query, args):
        self.runs.append((query, args))
        
def run_in_transaction(neo4j_driver, batch_job, dry=False):
    if dry:
        tx = DryRunTx()
        dry_run = partial(run_query, tx)
        batch_job(dry_run)
        return tx.runs
    else:
        neo4j_session = neo4j_driver.session()
        tx = neo4j_session.begin_transaction()
        neo_run = partial(run_query, tx)
        result_set = []
        try:
            result_set = batch_job(neo_run)
        except:
            tx.rollback()
            from traceback import print_exc
            print_exc(file=sys.stdout)
        else:
            tx.commit()
        finally:
            neo4j_session.close()
            return result_set

In [4]:
driver = neo4j_driver()

In [22]:
def find_reporting_debt(run_query):
    query = """
    MATCH 
        (file:JavaFile)-[:DEFINES]->
        (zk_coupled_class:JavaClass)
        -[zk_couple:COUPLES]->
        (zk_class:JavaClass) 
    WHERE 
        zk_class.name =~ {zk_class_name_exp} AND 
        zk_coupled_class.name =~ {class_name_exp}
    WITH file, zk_coupled_class, zk_couple
    MATCH 
        (file:JavaFile)-[:DEFINES]->
        (zk_coupled_class:JavaClass)
        -[db_couple:COUPLES]->
        (db_class:JavaClass) 
    WHERE db_class.name =~ {db_class_name_exp} 
    RETURN DISTINCT file, zk_couple, db_couple
    """
    zk_class_name_exp = "(?i)^org\.zkoss.*"
    class_name_exp = "(?i).*report.*"
    db_class_name_exp = "(?i)^java\.sql.*"
    result_set = run_query(query, {
            "zk_class_name_exp": zk_class_name_exp, 
            "db_class_name_exp": db_class_name_exp, 
            "class_name_exp": class_name_exp})
    return list(result_set)

In [23]:
result_set = run_in_transaction(driver, find_reporting_debt)
private static String[] splitByCharacterType(final String str, final boolean camelCase) {
        if (str == null) {
            return null;
        }
        if (str.isEmpty()) {
            return ArrayUtils.EMPTY_STRING_ARRAY;
        }
        final char[] c = str.toCharArray();
        final List<String> list = new ArrayList<String>();
        int tokenStart = 0;
        int currentType = Character.getType(c[tokenStart]);
        for (int pos = tokenStart + 1; pos < c.length; pos++) {
            final int type = Character.getType(c[pos]);
            if (type == currentType) {
                continue;
            }
            if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
                final int newTokenStart = pos - 1;
                if (newTokenStart != tokenStart) {
                    list.add(new String(c, tokenStart, newTokenStart - tokenStart));
                    tokenStart = newTokenStart;
                }
            } else {
                list.add(new String(c, tokenStart, pos - tokenStart));
                tokenStart = pos;
            }
            currentType = type;
        }
        list.add(new String(c, tokenStart, c.length - tokenStart));
        return list.toArray(new String[list.size()]);
}

StringUtils.splitByCharacterTypeCamelCase(null)         = null
StringUtils.splitByCharacterTypeCamelCase("")           = []
StringUtils.splitByCharacterTypeCamelCase("ab de fg")   = ["ab", " ", "de", " ", "fg"]
StringUtils.splitByCharacterTypeCamelCase("ab   de fg") = ["ab", "   ", "de", " ", "fg"]
StringUtils.splitByCharacterTypeCamelCase("ab:cd:ef")   = ["ab", ":", "cd", ":", "ef"]
StringUtils.splitByCharacterTypeCamelCase("number5")    = ["number", "5"]
StringUtils.splitByCharacterTypeCamelCase("fooBar")     = ["foo", "Bar"]
StringUtils.splitByCharacterTypeCamelCase("foo200Bar")  = ["foo", "200", "Bar"]
StringUtils.splitByCharacterTypeCamelCase("ASFRules")   = ["ASF", "Rules"]

In [54]:
def split_by_character_type(s, camel=True):
    if not s:
        return []
    parts = []
    token_start = 0
    for pos in range(1, len(s)):
        if ((s[pos].islower() and s[pos-1].islower()) or
            (s[pos].isupper() and s[pos-1].isupper()) or
            (s[pos].isdigit() and s[pos-1].isdigit()) or
            (not s[pos].isalnum() and not s[pos-1].isalnum())):
            continue
        if camel and s[pos].islower() and s[pos-1].isupper():
            new_token_start = pos - 1
            if new_token_start != token_start:
                parts.append(s[token_start: new_token_start])
                token_start = new_token_start
        else:
            parts.append(s[token_start: pos])
            token_start = pos
    parts.append(s[token_start: len(s)])
    return parts
    
ss = [None, "", "ab de fg", "ab   de fg", "ab:cd:ef", "number5", "fooBar", "foo200Bar", "ASFRules"]

for s in ss:
    print(split_by_character_type(s))


[]
[]
['ab', ' ', 'de', ' ', 'fg']
['ab', '   ', 'de', ' ', 'fg']
['ab', ':', 'cd', ':', 'ef']
['number', '5']
['foo', 'Bar']
['foo', '200', 'Bar']
['ASF', 'Rules']

In [55]:
def find_class_names(run_query):
    query = """
    MATCH (class:JavaClass)
    RETURN class.name as class_name
    """
    result_set = run_query(query, {})
    return list(result_set)

In [56]:
class_name_result_set = run_in_transaction(driver, find_class_names)

In [60]:
class_name_result_set[0]["class_name"]


Out[60]:
'org.compiere.grid.DefaultPaymentFormFactory'

In [61]:
split_by_character_type(class_name_result_set[0]["class_name"])


Out[61]:
['org',
 '.',
 'compiere',
 '.',
 'grid',
 '.',
 'Default',
 'Payment',
 'Form',
 'Factory']

In [62]:
s = class_name_result_set[0]["class_name"]

s.rsplit('.', 1)


Out[62]:
['org.compiere.grid', 'DefaultPaymentFormFactory']

In [65]:
from collections import namedtuple

ClassName = namedtuple("ClassName", ["long_name", "package_name", "class_name", "name_parts"])

class_names = []
for r in class_name_result_set:
    long_name = r["class_name"]
    package_class = long_name.rsplit('.', 1)
    name_parts = split_by_character_type(package_class[-1])
    class_names.append(ClassName(
            long_name=long_name,
            package_name=package_class[0] if len(package_class) == 2 else "",
            class_name=package_class[-1],
            name_parts=name_parts
        ))

In [75]:
terms = set()
for cn in class_names:
    terms.update(cn.name_parts)
    # terms.add(cn.name_parts[-1])

In [86]:
from itertools import chain
from collections import Counter

counter = Counter(chain.from_iterable([[np.casefold() for np in cn.name_parts] for cn in class_names]))

In [88]:
counter["report"]


Out[88]:
70

In [89]:
def build_class_name_vocabulary(class_names, run_query):
    for cn in class_names:
        for order, np in enumerate(cn.name_parts):
            query = """
            MATCH (class: JavaClass {name: {long_name}})
            MERGE (l:Lexem {name: {name_part}})
            CREATE (class)-[:CONTAINS {part_order: {part_order}}]->(l)
            """
            run_query(query, {
                    "long_name": cn.long_name,
                    "name_part": np,
                    "part_order": order
                })

In [93]:
batch_job = partial(build_class_name_vocabulary, class_names)
result = run_in_transaction(driver, batch_job, dry=True)
MATCH (class:JavaClass)<-[:CONTAINS]-(package:JavaPackage)
WHERE 
    package.name STARTS WITH 'org.adempiere' OR
    package.name STARTS WITH 'org.compiere' OR
    package.name STARTS WITH 'org.idempiere'
WITH class
MATCH (class)-[c:CONTAINS]->(l:Lexem)
WHERE l.name STARTS WITH 'Report'
RETURN c
MATCH (class:JavaClass)<-[:CONTAINS]-(package:JavaPackage)
WHERE 
    package.name STARTS WITH 'org.adempiere' OR
    package.name STARTS WITH 'org.compiere' OR
    package.name STARTS WITH 'org.idempiere'
WITH class
MATCH (class)-[c:CONTAINS]->(l:Lexem)
WHERE l.name STARTS WITH 'Report'
WITH class
MATCH (class)-[cp:COUPLES]->(coupled_class:JavaClass)
WITH class, coupled_class
MATCH (coupled_class)-[cp:COUPLES]->(class)
RETURN cp