#######################
# Config parameters
#######################
only_known = 0 # only compare known names from idc/stubs. useful for debugging instead of searching for unnamed functions
method_1 = 1   #turn on/off either of the 3 methods used to gather functions. use any combination of them :)
method_2 = 1
method_3 = 1
method_4 = 1
method_5 = 1
verbose = 1   # print / don't print stuff in the console
make_new_names = 1 # should MakeName be called? keep as 0 for testing purposes (to not actually write any function names)

# for method_1:
resplit = "[^_a-zA-Z'0-9\]\[\.\\\\]" # regex for splitting words
minwlen = 2    # min word length
optimlen = 25  # recommended length for function names

namelog = open("name_funcs.log", "w")
print "\nARM Function Naming Script - Written by A1ex"
if not verbose: print "QUIET MODE ACTIVATED"
print "Using methods:"
print "Method 1 (functions named from strings)    : ", bool(method_1)
print "Method 2 (functions being simple jumps)    : ", bool(method_2)
print "Method 3 (functions decompiled on one-line): ", bool(method_3)
print "Method 4 (functions named from source file): ", bool(method_4)

def new_name(f, newname, d):
	actualname = newname
	if newname.startswith("str:"): actualname = newname[4:]
	if newname.startswith("dec:"): actualname = newname[4:]
	if newname.startswith("src:"): actualname = newname[4:]
	if newname.startswith("called_by:"): actualname = newname[10:]
	letters = filter(lambda x: x.lower() > 'a' and x.lower() < 'z', newname)
	if len(letters) < 5: return 0
	if newname.startswith("sub_"): return 0
	newname = newname[:optimlen*2]
	if newname in d.N2A:
		for i in range(2,100):
			if "%s_v%d" % (newname, i) not in d.N2A:
				newname = "%s_v%d" % (newname, i)
				break
	#~ newname = newname.replace("-", "
	if verbose: print hex(f), newname
	if make_new_names: d.MakeName(f, newname)
	return 1

def all_words(d):
	W = {}
	for a,s in d.STRINGS.iteritems():
		for w in re.split(resplit, s):
			if len(w) >= minwlen:
				if w in W:
					W[w] += 1
				else:
					W[w] = 1
	W["this"] = 1e100
	return W

def weight(w):
	return 1.0 / W.get(w,1) + (1 if re.match("\[[a-zA-Z]+\]",w) else 0) + (-0.5 if w.endswith(".c") else 0)

def filter_words(wds, thr):
	ans = []
	for w in wds:
		if weight(w) > thr:
			ans.append(w)
	return ans

def eval_thr(thr, wds):
	msg = string.join(filter_words(wds, thr))
	return len(msg) < optimlen
	
def find_thr(lo,hi,wds):
	#~ print lo,hi
	if hi - lo < 0.01: return lo
	m = (hi+lo)/2
	too_short = eval_thr(m, wds)
	if too_short: return find_thr(lo,m,wds)
	else: return find_thr(m,hi,wds)

def guess_name(F):
	refs = find_refs(F.dump, None, F.addr)
	
	wds = []
	for a,v in refs:
		s = GuessString(F.dump, v)
		if s:
			for w in re.split(resplit,s):
				if len(w) >= minwlen and w not in wds:
					wds.append(w)

	thr = find_thr(0., 5., wds)
	msg = string.join(filter_words(wds, thr), "_")
	msg = msg.replace(" ", "_")[:optimlen*2]
	#~ for w in wds:
		#~ print (w,weight(w)),
		
	return msg
	
if (method_1 == 1):	
	def name_funcs_from_strings(d):
		k = 0
		print >> namelog, "Naming functions from strings..."
		print >> namelog, "================================"
		for f in d.FUNCS:
			try: F = d.Fun(f)
			except: continue
			if bool(F.name.startswith("sub_")) != bool(only_known):
				newname = "str:" + guess_name(F)
				if newname:
					if verbose: print "%60s" % F.name, "<--->", newname
					k += new_name(f, newname, d)
		return k
		
		
if (method_2 == 1):
	def name_funcs_j(d):
		print >> namelog, "Naming functions which jump to another function..."
		print >> namelog, "=================================================="
		k = 0
		for f in d.FUNCS:
			try: F = d.Fun(f)
			except: continue
			if bool(F.name.startswith("sub_")) != bool(only_known):
				if F.size == 4 and GetMnef(f):
					try:
						a = str(GetOpnd(f,0))
						a = int(a,16)
						calledfunc_name = d.Fun(a).name
						assert not calledfunc_name.startswith("sub_")
						newname = "j_" + d.Fun(a).name
					except: continue

					print >> namelog, hex(f), newname
					#ifdef verbrose_mode
					if verbose: print hex(f),F.name
					#endif
					k += new_name(f, newname, d)
					#~ print hex(f),F.name		#not sure why this is here twice?? appers to be making duplicates.
		return k

if (method_3 == 1):
	def name_funcs_oneliners(d):
		k = 0
		A = []
		print >> namelog, "Naming functions which fit on one line when decompiled..."
		print >> namelog, "========================================================="
		for f in d.FUNCS:
			try: F = d.Fun(f)
			except: continue
			if bool(F.name.startswith("sub_")) != bool(only_known):
				if F.size <= 4*5:
					try:
						CP = emusym.find_code_paths(ea, timeout=1)
						assert len(CP) == 1
						s = deco.P.doprint(deco.decompile(f, CP)).split("\n")
					except: continue
					if len(s) == 2:
						newname = s[0]
						newname = re.sub(" => ret_.*", "", newname)
						newname = newname.replace(" ", "_");
						newname = "dec:" + newname
						print >> namelog, hex(f), newname
						k += new_name(f, newname, d)
		return k

def name_funcs_srcfile(d):
	k = 0
	A = []
	print >> namelog, "Naming functions from their source file..."
	print >> namelog, "=========================================="
	for f in d.FUNCS:
		try: F = d.Fun(f)
		except: continue
		if bool(F.name.startswith("sub_")) != bool(only_known):
			try: s = d.SRCFILES[f]
			except: continue
			print s
			newname = "src:" + s
			k += new_name(f, newname, d)
	return k

def name_funcs_leaf_from_callers(d):
	k = 0
	A = []
	print >> namelog, "Naming leaf functions from their callers..."
	print >> namelog, "==========================================="
	for f in d.FUNCS:
		try: F = d.Fun(f)
		except: continue
		if bool(F.name.startswith("sub_")) != bool(only_known):
			callsfrom = len(CodeRefsFrom(f))
			if callsfrom == 0:
				print CodeRefsTo(f)
				callsto = list(set([GetFunctionName(x) for x in CodeRefsTo(f) if "sub_" not in GetFunctionName(x)]))
				if len(callsto):
					newname = "called_by:" + string.join(callsto, "_and_")
					k += new_name(f, newname, d)
	return k

W = all_words(D[0])
if (method_1 == 1): a = name_funcs_from_strings(D[0])
else: a = 0
if (method_2 == 1): b = name_funcs_j(D[0])
else: b = 0
if (method_3 == 1): c = name_funcs_oneliners(D[0])
else: c = 0
if (method_4 == 1): d = name_funcs_srcfile(D[0])
else: d = 0
if (method_5 == 1): e = name_funcs_leaf_from_callers(D[0])
else: e = 0

namelog.close()


##############################################
# print information about new functions named
##############################################
def named_percentage(d):
	k = 0
	A = []
	named, unnamed = 0, 0
	for f in d.FUNCS:
		try: F = d.Fun(f)
		except: continue
		if F.name.startswith("sub_"):
			unnamed += 1
		else:
			named += 1

	print "# Total Functions Named:", named, "\n# Total Functions:", named + unnamed, "\n# Percentage Named:", 100 * named / (named + unnamed), "%"
	print "################################################################"


print "\n"
print "################################################################"
print "##################### FUNCTION STATISTICS ######################"
print "################################################################"
print "# Functions named from strings:", a, \
	  "\n# Functions which jump to another function:", b, \
	  "\n# Functions which fit on 1 line when decompiled:", c, \
	  "\n# Functions named from source file:", d, \
	  "\n# Functions 'leaf nodes' named from their callers:", e
print "# Total New Functions Named:", a+b+c, "\n#"
D[0].named_percentage()