MATCH
(file:JavaFile)-[:DEFINES]->
(zk_coupled_class:JavaClass)
-[zk_couple:COUPLES]->
(zk_class:JavaClass)
WHERE
zk_class.name =~ '(?i)^org\\.zkoss.*' AND
zk_coupled_class.name =~ '(?i).*report.*'
WITH file, zk_coupled_class, zk_couple
MATCH
(file:JavaFile)-[:DEFINES]->
(zk_coupled_class:JavaClass)
-[db_couple:COUPLES]->
(db_class:JavaClass)
WHERE db_class.name =~ '(?i)^java\\.sql.*'
RETURN DISTINCT file, zk_couple, db_couple
In [1]:
import sys
# add project root directory to python path to enable import of saapy
if ".." not in sys.path:
sys.path.append('..')
import keyring
from functools import partial
from neo4j.v1 import GraphDatabase, basic_auth
def neo4j_driver():
neo4j_service = "test_neo4j"
neo4j_user = "neo4j"
neo4j_password = keyring.get_password(neo4j_service, neo4j_user)
neo4j_url = "bolt://localhost"
driver = GraphDatabase.driver(neo4j_url, auth=basic_auth(neo4j_user, neo4j_password))
return driver
In [2]:
def run_query(tx, query, args):
return tx.run(query, args)
class DryRunTx:
def __init__(self):
self.runs = []
def run(self, query, args):
self.runs.append((query, args))
def run_in_transaction(neo4j_driver, batch_job, dry=False):
if dry:
tx = DryRunTx()
dry_run = partial(run_query, tx)
batch_job(dry_run)
return tx.runs
else:
neo4j_session = neo4j_driver.session()
tx = neo4j_session.begin_transaction()
neo_run = partial(run_query, tx)
result_set = []
try:
result_set = batch_job(neo_run)
except:
tx.rollback()
from traceback import print_exc
print_exc(file=sys.stdout)
else:
tx.commit()
finally:
neo4j_session.close()
return result_set
In [4]:
driver = neo4j_driver()
In [22]:
def find_reporting_debt(run_query):
query = """
MATCH
(file:JavaFile)-[:DEFINES]->
(zk_coupled_class:JavaClass)
-[zk_couple:COUPLES]->
(zk_class:JavaClass)
WHERE
zk_class.name =~ {zk_class_name_exp} AND
zk_coupled_class.name =~ {class_name_exp}
WITH file, zk_coupled_class, zk_couple
MATCH
(file:JavaFile)-[:DEFINES]->
(zk_coupled_class:JavaClass)
-[db_couple:COUPLES]->
(db_class:JavaClass)
WHERE db_class.name =~ {db_class_name_exp}
RETURN DISTINCT file, zk_couple, db_couple
"""
zk_class_name_exp = "(?i)^org\.zkoss.*"
class_name_exp = "(?i).*report.*"
db_class_name_exp = "(?i)^java\.sql.*"
result_set = run_query(query, {
"zk_class_name_exp": zk_class_name_exp,
"db_class_name_exp": db_class_name_exp,
"class_name_exp": class_name_exp})
return list(result_set)
In [23]:
result_set = run_in_transaction(driver, find_reporting_debt)
private static String[] splitByCharacterType(final String str, final boolean camelCase) {
if (str == null) {
return null;
}
if (str.isEmpty()) {
return ArrayUtils.EMPTY_STRING_ARRAY;
}
final char[] c = str.toCharArray();
final List<String> list = new ArrayList<String>();
int tokenStart = 0;
int currentType = Character.getType(c[tokenStart]);
for (int pos = tokenStart + 1; pos < c.length; pos++) {
final int type = Character.getType(c[pos]);
if (type == currentType) {
continue;
}
if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
final int newTokenStart = pos - 1;
if (newTokenStart != tokenStart) {
list.add(new String(c, tokenStart, newTokenStart - tokenStart));
tokenStart = newTokenStart;
}
} else {
list.add(new String(c, tokenStart, pos - tokenStart));
tokenStart = pos;
}
currentType = type;
}
list.add(new String(c, tokenStart, c.length - tokenStart));
return list.toArray(new String[list.size()]);
}
StringUtils.splitByCharacterTypeCamelCase(null) = null
StringUtils.splitByCharacterTypeCamelCase("") = []
StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
StringUtils.splitByCharacterTypeCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
StringUtils.splitByCharacterTypeCamelCase("number5") = ["number", "5"]
StringUtils.splitByCharacterTypeCamelCase("fooBar") = ["foo", "Bar"]
StringUtils.splitByCharacterTypeCamelCase("foo200Bar") = ["foo", "200", "Bar"]
StringUtils.splitByCharacterTypeCamelCase("ASFRules") = ["ASF", "Rules"]
In [54]:
def split_by_character_type(s, camel=True):
if not s:
return []
parts = []
token_start = 0
for pos in range(1, len(s)):
if ((s[pos].islower() and s[pos-1].islower()) or
(s[pos].isupper() and s[pos-1].isupper()) or
(s[pos].isdigit() and s[pos-1].isdigit()) or
(not s[pos].isalnum() and not s[pos-1].isalnum())):
continue
if camel and s[pos].islower() and s[pos-1].isupper():
new_token_start = pos - 1
if new_token_start != token_start:
parts.append(s[token_start: new_token_start])
token_start = new_token_start
else:
parts.append(s[token_start: pos])
token_start = pos
parts.append(s[token_start: len(s)])
return parts
ss = [None, "", "ab de fg", "ab de fg", "ab:cd:ef", "number5", "fooBar", "foo200Bar", "ASFRules"]
for s in ss:
print(split_by_character_type(s))
In [55]:
def find_class_names(run_query):
query = """
MATCH (class:JavaClass)
RETURN class.name as class_name
"""
result_set = run_query(query, {})
return list(result_set)
In [56]:
class_name_result_set = run_in_transaction(driver, find_class_names)
In [60]:
class_name_result_set[0]["class_name"]
Out[60]:
In [61]:
split_by_character_type(class_name_result_set[0]["class_name"])
Out[61]:
In [62]:
s = class_name_result_set[0]["class_name"]
s.rsplit('.', 1)
Out[62]:
In [65]:
from collections import namedtuple
ClassName = namedtuple("ClassName", ["long_name", "package_name", "class_name", "name_parts"])
class_names = []
for r in class_name_result_set:
long_name = r["class_name"]
package_class = long_name.rsplit('.', 1)
name_parts = split_by_character_type(package_class[-1])
class_names.append(ClassName(
long_name=long_name,
package_name=package_class[0] if len(package_class) == 2 else "",
class_name=package_class[-1],
name_parts=name_parts
))
In [75]:
terms = set()
for cn in class_names:
terms.update(cn.name_parts)
# terms.add(cn.name_parts[-1])
In [86]:
from itertools import chain
from collections import Counter
counter = Counter(chain.from_iterable([[np.casefold() for np in cn.name_parts] for cn in class_names]))
In [88]:
counter["report"]
Out[88]:
In [89]:
def build_class_name_vocabulary(class_names, run_query):
for cn in class_names:
for order, np in enumerate(cn.name_parts):
query = """
MATCH (class: JavaClass {name: {long_name}})
MERGE (l:Lexem {name: {name_part}})
CREATE (class)-[:CONTAINS {part_order: {part_order}}]->(l)
"""
run_query(query, {
"long_name": cn.long_name,
"name_part": np,
"part_order": order
})
In [93]:
batch_job = partial(build_class_name_vocabulary, class_names)
result = run_in_transaction(driver, batch_job, dry=True)
MATCH (class:JavaClass)<-[:CONTAINS]-(package:JavaPackage)
WHERE
package.name STARTS WITH 'org.adempiere' OR
package.name STARTS WITH 'org.compiere' OR
package.name STARTS WITH 'org.idempiere'
WITH class
MATCH (class)-[c:CONTAINS]->(l:Lexem)
WHERE l.name STARTS WITH 'Report'
RETURN c
MATCH (class:JavaClass)<-[:CONTAINS]-(package:JavaPackage)
WHERE
package.name STARTS WITH 'org.adempiere' OR
package.name STARTS WITH 'org.compiere' OR
package.name STARTS WITH 'org.idempiere'
WITH class
MATCH (class)-[c:CONTAINS]->(l:Lexem)
WHERE l.name STARTS WITH 'Report'
WITH class
MATCH (class)-[cp:COUPLES]->(coupled_class:JavaClass)
WITH class, coupled_class
MATCH (coupled_class)-[cp:COUPLES]->(class)
RETURN cp