####################### # Config parameters ####################### only_known = 0 # only compare known names from idc/stubs. useful for debugging instead of searching for unnamed functions method_1 = 1 #turn on/off either of the 3 methods used to gather functions. use any combination of them :) method_2 = 1 method_3 = 1 method_4 = 1 method_5 = 1 verbose = 1 # print / don't print stuff in the console make_new_names = 1 # should MakeName be called? keep as 0 for testing purposes (to not actually write any function names) # for method_1: resplit = "[^_a-zA-Z'0-9\]\[\.\\\\]" # regex for splitting words minwlen = 2 # min word length optimlen = 25 # recommended length for function names namelog = open("name_funcs.log", "w") print "\nARM Function Naming Script - Written by A1ex" if not verbose: print "QUIET MODE ACTIVATED" print "Using methods:" print "Method 1 (functions named from strings) : ", bool(method_1) print "Method 2 (functions being simple jumps) : ", bool(method_2) print "Method 3 (functions decompiled on one-line): ", bool(method_3) print "Method 4 (functions named from source file): ", bool(method_4) def new_name(f, newname, d): actualname = newname if newname.startswith("str:"): actualname = newname[4:] if newname.startswith("dec:"): actualname = newname[4:] if newname.startswith("src:"): actualname = newname[4:] if newname.startswith("called_by:"): actualname = newname[10:] letters = filter(lambda x: x.lower() > 'a' and x.lower() < 'z', newname) if len(letters) < 5: return 0 if newname.startswith("sub_"): return 0 newname = newname[:optimlen*2] if newname in d.N2A: for i in range(2,100): if "%s_v%d" % (newname, i) not in d.N2A: newname = "%s_v%d" % (newname, i) break #~ newname = newname.replace("-", " if verbose: print hex(f), newname if make_new_names: d.MakeName(f, newname) return 1 def all_words(d): W = {} for a,s in d.STRINGS.iteritems(): for w in re.split(resplit, s): if len(w) >= minwlen: if w in W: W[w] += 1 else: W[w] = 1 W["this"] = 1e100 return W def weight(w): return 1.0 / W.get(w,1) + (1 if re.match("\[[a-zA-Z]+\]",w) else 0) + (-0.5 if w.endswith(".c") else 0) def filter_words(wds, thr): ans = [] for w in wds: if weight(w) > thr: ans.append(w) return ans def eval_thr(thr, wds): msg = string.join(filter_words(wds, thr)) return len(msg) < optimlen def find_thr(lo,hi,wds): #~ print lo,hi if hi - lo < 0.01: return lo m = (hi+lo)/2 too_short = eval_thr(m, wds) if too_short: return find_thr(lo,m,wds) else: return find_thr(m,hi,wds) def guess_name(F): refs = find_refs(F.dump, None, F.addr) wds = [] for a,v in refs: s = GuessString(F.dump, v) if s: for w in re.split(resplit,s): if len(w) >= minwlen and w not in wds: wds.append(w) thr = find_thr(0., 5., wds) msg = string.join(filter_words(wds, thr), "_") msg = msg.replace(" ", "_")[:optimlen*2] #~ for w in wds: #~ print (w,weight(w)), return msg if (method_1 == 1): def name_funcs_from_strings(d): k = 0 print >> namelog, "Naming functions from strings..." print >> namelog, "================================" for f in d.FUNCS: try: F = d.Fun(f) except: continue if bool(F.name.startswith("sub_")) != bool(only_known): newname = "str:" + guess_name(F) if newname: if verbose: print "%60s" % F.name, "<--->", newname k += new_name(f, newname, d) return k if (method_2 == 1): def name_funcs_j(d): print >> namelog, "Naming functions which jump to another function..." print >> namelog, "==================================================" k = 0 for f in d.FUNCS: try: F = d.Fun(f) except: continue if bool(F.name.startswith("sub_")) != bool(only_known): if F.size == 4 and GetMnef(f): try: a = str(GetOpnd(f,0)) a = int(a,16) calledfunc_name = d.Fun(a).name assert not calledfunc_name.startswith("sub_") newname = "j_" + d.Fun(a).name except: continue print >> namelog, hex(f), newname #ifdef verbrose_mode if verbose: print hex(f),F.name #endif k += new_name(f, newname, d) #~ print hex(f),F.name #not sure why this is here twice?? appers to be making duplicates. return k if (method_3 == 1): def name_funcs_oneliners(d): k = 0 A = [] print >> namelog, "Naming functions which fit on one line when decompiled..." print >> namelog, "=========================================================" for f in d.FUNCS: try: F = d.Fun(f) except: continue if bool(F.name.startswith("sub_")) != bool(only_known): if F.size <= 4*5: try: CP = emusym.find_code_paths(ea, timeout=1) assert len(CP) == 1 s = deco.P.doprint(deco.decompile(f, CP)).split("\n") except: continue if len(s) == 2: newname = s[0] newname = re.sub(" => ret_.*", "", newname) newname = newname.replace(" ", "_"); newname = "dec:" + newname print >> namelog, hex(f), newname k += new_name(f, newname, d) return k def name_funcs_srcfile(d): k = 0 A = [] print >> namelog, "Naming functions from their source file..." print >> namelog, "==========================================" for f in d.FUNCS: try: F = d.Fun(f) except: continue if bool(F.name.startswith("sub_")) != bool(only_known): try: s = d.SRCFILES[f] except: continue print s newname = "src:" + s k += new_name(f, newname, d) return k def name_funcs_leaf_from_callers(d): k = 0 A = [] print >> namelog, "Naming leaf functions from their callers..." print >> namelog, "===========================================" for f in d.FUNCS: try: F = d.Fun(f) except: continue if bool(F.name.startswith("sub_")) != bool(only_known): callsfrom = len(CodeRefsFrom(f)) if callsfrom == 0: print CodeRefsTo(f) callsto = list(set([GetFunctionName(x) for x in CodeRefsTo(f) if "sub_" not in GetFunctionName(x)])) if len(callsto): newname = "called_by:" + string.join(callsto, "_and_") k += new_name(f, newname, d) return k W = all_words(D[0]) if (method_1 == 1): a = name_funcs_from_strings(D[0]) else: a = 0 if (method_2 == 1): b = name_funcs_j(D[0]) else: b = 0 if (method_3 == 1): c = name_funcs_oneliners(D[0]) else: c = 0 if (method_4 == 1): d = name_funcs_srcfile(D[0]) else: d = 0 if (method_5 == 1): e = name_funcs_leaf_from_callers(D[0]) else: e = 0 namelog.close() ############################################## # print information about new functions named ############################################## def named_percentage(d): k = 0 A = [] named, unnamed = 0, 0 for f in d.FUNCS: try: F = d.Fun(f) except: continue if F.name.startswith("sub_"): unnamed += 1 else: named += 1 print "# Total Functions Named:", named, "\n# Total Functions:", named + unnamed, "\n# Percentage Named:", 100 * named / (named + unnamed), "%" print "################################################################" print "\n" print "################################################################" print "##################### FUNCTION STATISTICS ######################" print "################################################################" print "# Functions named from strings:", a, \ "\n# Functions which jump to another function:", b, \ "\n# Functions which fit on 1 line when decompiled:", c, \ "\n# Functions named from source file:", d, \ "\n# Functions 'leaf nodes' named from their callers:", e print "# Total New Functions Named:", a+b+c, "\n#" D[0].named_percentage()