Skip to content

Commit 61adbe3

Browse files
committed
Add a bit nicer tracing
1 parent f335080 commit 61adbe3

File tree

3 files changed

+101
-11
lines changed

3 files changed

+101
-11
lines changed

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ install_requires =
2525
#minidump ~=0.0.21 # this library has a bug, is vendored locally
2626
unicorn ~=1.0.3
2727
pefile >=2021.9.3
28+
capstone ~=4.0.2
2829

2930
[options.packages.find]
3031
where = src

src/dumpulator/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from .dumpulator import *
1+
from .dumpulator import Dumpulator
22
from .syscalls import syscall

src/dumpulator/dumpulator.py

+99-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pefile import *
77
import inspect
88
from .native import *
9+
from capstone import *
910

1011
syscall_functions = {}
1112

@@ -299,6 +300,7 @@ def __init__(self, uc: Uc, x64):
299300
"mxcsr": UC_X86_REG_MXCSR,
300301
"fs_base": UC_X86_REG_FS_BASE,
301302
"gs_base": UC_X86_REG_GS_BASE,
303+
"rflags": UC_X86_REG_EFLAGS,
302304
}
303305
if self._x64:
304306
self._regmap.update({
@@ -384,18 +386,32 @@ class Dumpulator:
384386
def __init__(self, minidump_file, trace=False):
385387
self._minidump = MinidumpFile.parse(minidump_file)
386388
self._x64 = type(self._minidump.threads.threads[0].ContextObject) is not WOW64_CONTEXT
389+
390+
if trace:
391+
self.trace = open(minidump_file + ".trace", "w")
392+
else:
393+
self.trace = None
394+
395+
self.last_module: Optional[MinidumpModule] = None
396+
387397
mode = UC_MODE_64 if self._x64 else UC_MODE_32
388398
self._uc = Uc(UC_ARCH_X86, mode)
389399

400+
mode = CS_MODE_64 if self._x64 else CS_MODE_32
401+
self.cs = Cs(CS_ARCH_X86, mode)
402+
self.cs.detail = True
403+
390404
self.regs = Registers(self._uc, self._x64)
391405
self.args = Arguments(self._uc, self.regs, self._x64)
392406
self._allocate_base = None
393407
self._allocate_size = 0x10000
394408
self._allocate_ptr = None
395-
self._setup_emulator(trace)
409+
self._setup_emulator()
396410
self.exit_code = None
397411
self.syscalls = []
398412
self._setup_syscalls()
413+
self.exports = self._setup_exports()
414+
399415

400416
# Source: https://github.com/mandiant/speakeasy/blob/767edd2272510a5badbab89c5f35d43a94041378/speakeasy/windows/winemu.py#L533
401417
def _setup_gdt(self, teb_addr):
@@ -480,17 +496,18 @@ def _create_selector(index, flags):
480496
selector = _create_selector(15, GDT_FLAGS.Ring3)
481497
self.regs.gs = selector
482498

483-
def _setup_emulator(self, trace):
499+
def _setup_emulator(self):
484500
# set up hooks
485501
self._uc.hook_add(UC_HOOK_MEM_READ_UNMAPPED | UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_FETCH_UNMAPPED | UC_HOOK_MEM_READ_PROT | UC_HOOK_MEM_WRITE_PROT | UC_HOOK_MEM_FETCH_PROT, _hook_mem, user_data=self)
486502
#self._uc.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, _hook_mem, user_data=self)
487-
if trace:
503+
if self.trace:
488504
self._uc.hook_add(UC_HOOK_CODE, _hook_code, user_data=self)
489505
#self._uc.hook_add(UC_HOOK_MEM_READ_INVALID, self._hook_mem, user_data=None)
490506
#self._uc.hook_add(UC_HOOK_MEM_WRITE_INVALID, self._hook_mem, user_data=None)
491507
self._uc.hook_add(UC_HOOK_INSN, _hook_syscall, user_data=self, arg1=UC_X86_INS_SYSCALL)
492508
self._uc.hook_add(UC_HOOK_INSN, _hook_syscall, user_data=self, arg1=UC_X86_INS_SYSENTER)
493509
self._uc.hook_add(UC_HOOK_INTR, _hook_interrupt, user_data=self)
510+
self._uc.hook_add(UC_HOOK_INSN_INVALID, _hook_invalid, user_data=self)
494511

495512
# map in codecave
496513
self._uc.mem_map(CAVE_ADDR, CAVE_SIZE)
@@ -562,6 +579,19 @@ def _setup_emulator(self, trace):
562579

563580
self._setup_gdt(thread.Teb)
564581

582+
def _setup_exports(self):
583+
exports = {}
584+
for module in self._minidump.modules.modules:
585+
module_name = module.name.split('\\')[-1].lower()
586+
print(f"{module_name} 0x{module.baseaddress:x}[0x{module.size:x}]")
587+
for export in self._parse_module_exports(module):
588+
if export.name:
589+
name = export.name.decode("utf-8")
590+
else:
591+
name = f"#{export.ordinal}"
592+
exports[module.baseaddress + export.address] = f"{module_name}:{name}"
593+
return exports
594+
565595
def _find_module(self, name) -> MinidumpModule:
566596
module: MinidumpModule
567597
for module in self._minidump.modules.modules:
@@ -570,20 +600,34 @@ def _find_module(self, name) -> MinidumpModule:
570600
return module
571601
raise Exception(f"Module '{name}' not found")
572602

573-
def _setup_syscalls(self):
574-
# Load the ntdll module from memory
575-
ntdll = self._find_module("ntdll.dll")
576-
ntdll_data = self.read(ntdll.baseaddress, ntdll.size)
577-
pe = PE(data=ntdll_data, fast_load=True)
603+
def find_module_by_addr(self, address) -> Optional[MinidumpModule]:
604+
module: MinidumpModule
605+
for module in self._minidump.modules.modules:
606+
if module.baseaddress <= address < module.baseaddress + module.size:
607+
return module
608+
return None
609+
610+
def _parse_module_exports(self, module):
611+
try:
612+
module_data = self.read(module.baseaddress, module.size)
613+
except UcError:
614+
print(f"Failed to read module data")
615+
return []
616+
pe = PE(data=module_data, fast_load=True)
578617
# Hack to adjust pefile to accept in-memory modules
579618
for section in pe.sections:
580619
# Potentially interesting members: Misc_PhysicalAddress, Misc_VirtualSize, SizeOfRawData
581620
section.PointerToRawData = section.VirtualAddress
582621
section.PointerToRawData_adj = section.VirtualAddress
583622
# Parser exports and find the syscall indices
584623
pe.parse_data_directories(directories=[DIRECTORY_ENTRY["IMAGE_DIRECTORY_ENTRY_EXPORT"]])
624+
return pe.DIRECTORY_ENTRY_EXPORT.symbols if hasattr(pe, "DIRECTORY_ENTRY_EXPORT") else []
625+
626+
def _setup_syscalls(self):
627+
# Load the ntdll module from memory
628+
ntdll = self._find_module("ntdll.dll")
585629
syscalls = []
586-
for export in pe.DIRECTORY_ENTRY_EXPORT.symbols:
630+
for export in self._parse_module_exports(ntdll):
587631
if export.name and export.name.startswith(b"Zw"):
588632
syscalls.append((export.address, export.name.decode("utf-8")))
589633
elif export.name == b"Wow64Transition":
@@ -698,9 +742,50 @@ def _hook_mem(uc: Uc, access, address, size, value, dp: Dumpulator):
698742
print(f"unmapped fetch of {address:0x}[{size:0x}] = {value:0x}, cip = {dp.regs.cip:0x}")
699743
return False
700744

745+
def _get_regs(instr):
746+
regs = set()
747+
for op in instr.operands:
748+
if op.type == CS_OP_REG:
749+
regs.add(instr.reg_name(op.value.reg))
750+
elif op.type == CS_OP_MEM:
751+
if op.value.mem.base != 0:
752+
regs.add(instr.reg_name(op.value.mem.base))
753+
if op.value.mem.index != 0:
754+
regs.add(instr.reg_name(op.value.mem.index))
755+
for reg in instr.regs_read:
756+
regs.add(instr.reg_name(reg))
757+
for reg in instr.regs_write:
758+
regs.add(instr.reg_name(reg))
759+
return regs
701760

702761
def _hook_code(uc: Uc, address, size, dp: Dumpulator):
703-
print(f"instruction: {address:0x} {dp.read(address, size).hex()}")
762+
code = dp.read(address, size)
763+
instr = next(dp.cs.disasm(code, address, 1))
764+
address_name = dp.exports.get(address, "")
765+
766+
module = ""
767+
if dp.last_module and dp.last_module.baseaddress <= address < dp.last_module.baseaddress + dp.last_module.size:
768+
# same module again
769+
pass
770+
else:
771+
# new module
772+
dp.last_module = dp.find_module_by_addr(address)
773+
if dp.last_module:
774+
module = dp.last_module.name.split("\\")[-1].lower()
775+
776+
if address_name:
777+
address_name = " " + address_name
778+
elif module:
779+
address_name = " " + module
780+
781+
line = f"0x{address:x}{address_name}|{instr.mnemonic}"
782+
if instr.op_str:
783+
line += " "
784+
line += instr.op_str
785+
for reg in _get_regs(instr):
786+
line += f"|{reg}=0x{dp.regs.__getattr__(reg):x}"
787+
line += "\n"
788+
dp.trace.write(line)
704789
return True
705790

706791

@@ -758,3 +843,7 @@ def _hook_syscall(uc: Uc, dp: Dumpulator):
758843
else:
759844
print(f"syscall index {index:0x} out of range")
760845
uc.emu_stop()
846+
847+
def _hook_invalid(uc: Uc, address, dp: Dumpulator):
848+
print(f"invalid instruction at {address:0x}")
849+
return False

0 commit comments

Comments
 (0)