6
6
from pefile import *
7
7
import inspect
8
8
from .native import *
9
+ from capstone import *
9
10
10
11
syscall_functions = {}
11
12
@@ -299,6 +300,7 @@ def __init__(self, uc: Uc, x64):
299
300
"mxcsr" : UC_X86_REG_MXCSR ,
300
301
"fs_base" : UC_X86_REG_FS_BASE ,
301
302
"gs_base" : UC_X86_REG_GS_BASE ,
303
+ "rflags" : UC_X86_REG_EFLAGS ,
302
304
}
303
305
if self ._x64 :
304
306
self ._regmap .update ({
@@ -384,18 +386,32 @@ class Dumpulator:
384
386
def __init__ (self , minidump_file , trace = False ):
385
387
self ._minidump = MinidumpFile .parse (minidump_file )
386
388
self ._x64 = type (self ._minidump .threads .threads [0 ].ContextObject ) is not WOW64_CONTEXT
389
+
390
+ if trace :
391
+ self .trace = open (minidump_file + ".trace" , "w" )
392
+ else :
393
+ self .trace = None
394
+
395
+ self .last_module : Optional [MinidumpModule ] = None
396
+
387
397
mode = UC_MODE_64 if self ._x64 else UC_MODE_32
388
398
self ._uc = Uc (UC_ARCH_X86 , mode )
389
399
400
+ mode = CS_MODE_64 if self ._x64 else CS_MODE_32
401
+ self .cs = Cs (CS_ARCH_X86 , mode )
402
+ self .cs .detail = True
403
+
390
404
self .regs = Registers (self ._uc , self ._x64 )
391
405
self .args = Arguments (self ._uc , self .regs , self ._x64 )
392
406
self ._allocate_base = None
393
407
self ._allocate_size = 0x10000
394
408
self ._allocate_ptr = None
395
- self ._setup_emulator (trace )
409
+ self ._setup_emulator ()
396
410
self .exit_code = None
397
411
self .syscalls = []
398
412
self ._setup_syscalls ()
413
+ self .exports = self ._setup_exports ()
414
+
399
415
400
416
# Source: https://github.com/mandiant/speakeasy/blob/767edd2272510a5badbab89c5f35d43a94041378/speakeasy/windows/winemu.py#L533
401
417
def _setup_gdt (self , teb_addr ):
@@ -480,17 +496,18 @@ def _create_selector(index, flags):
480
496
selector = _create_selector (15 , GDT_FLAGS .Ring3 )
481
497
self .regs .gs = selector
482
498
483
- def _setup_emulator (self , trace ):
499
+ def _setup_emulator (self ):
484
500
# set up hooks
485
501
self ._uc .hook_add (UC_HOOK_MEM_READ_UNMAPPED | UC_HOOK_MEM_WRITE_UNMAPPED | UC_HOOK_MEM_FETCH_UNMAPPED | UC_HOOK_MEM_READ_PROT | UC_HOOK_MEM_WRITE_PROT | UC_HOOK_MEM_FETCH_PROT , _hook_mem , user_data = self )
486
502
#self._uc.hook_add(UC_HOOK_MEM_FETCH_UNMAPPED, _hook_mem, user_data=self)
487
- if trace :
503
+ if self . trace :
488
504
self ._uc .hook_add (UC_HOOK_CODE , _hook_code , user_data = self )
489
505
#self._uc.hook_add(UC_HOOK_MEM_READ_INVALID, self._hook_mem, user_data=None)
490
506
#self._uc.hook_add(UC_HOOK_MEM_WRITE_INVALID, self._hook_mem, user_data=None)
491
507
self ._uc .hook_add (UC_HOOK_INSN , _hook_syscall , user_data = self , arg1 = UC_X86_INS_SYSCALL )
492
508
self ._uc .hook_add (UC_HOOK_INSN , _hook_syscall , user_data = self , arg1 = UC_X86_INS_SYSENTER )
493
509
self ._uc .hook_add (UC_HOOK_INTR , _hook_interrupt , user_data = self )
510
+ self ._uc .hook_add (UC_HOOK_INSN_INVALID , _hook_invalid , user_data = self )
494
511
495
512
# map in codecave
496
513
self ._uc .mem_map (CAVE_ADDR , CAVE_SIZE )
@@ -562,6 +579,19 @@ def _setup_emulator(self, trace):
562
579
563
580
self ._setup_gdt (thread .Teb )
564
581
582
+ def _setup_exports (self ):
583
+ exports = {}
584
+ for module in self ._minidump .modules .modules :
585
+ module_name = module .name .split ('\\ ' )[- 1 ].lower ()
586
+ print (f"{ module_name } 0x{ module .baseaddress :x} [0x{ module .size :x} ]" )
587
+ for export in self ._parse_module_exports (module ):
588
+ if export .name :
589
+ name = export .name .decode ("utf-8" )
590
+ else :
591
+ name = f"#{ export .ordinal } "
592
+ exports [module .baseaddress + export .address ] = f"{ module_name } :{ name } "
593
+ return exports
594
+
565
595
def _find_module (self , name ) -> MinidumpModule :
566
596
module : MinidumpModule
567
597
for module in self ._minidump .modules .modules :
@@ -570,20 +600,34 @@ def _find_module(self, name) -> MinidumpModule:
570
600
return module
571
601
raise Exception (f"Module '{ name } ' not found" )
572
602
573
- def _setup_syscalls (self ):
574
- # Load the ntdll module from memory
575
- ntdll = self ._find_module ("ntdll.dll" )
576
- ntdll_data = self .read (ntdll .baseaddress , ntdll .size )
577
- pe = PE (data = ntdll_data , fast_load = True )
603
+ def find_module_by_addr (self , address ) -> Optional [MinidumpModule ]:
604
+ module : MinidumpModule
605
+ for module in self ._minidump .modules .modules :
606
+ if module .baseaddress <= address < module .baseaddress + module .size :
607
+ return module
608
+ return None
609
+
610
+ def _parse_module_exports (self , module ):
611
+ try :
612
+ module_data = self .read (module .baseaddress , module .size )
613
+ except UcError :
614
+ print (f"Failed to read module data" )
615
+ return []
616
+ pe = PE (data = module_data , fast_load = True )
578
617
# Hack to adjust pefile to accept in-memory modules
579
618
for section in pe .sections :
580
619
# Potentially interesting members: Misc_PhysicalAddress, Misc_VirtualSize, SizeOfRawData
581
620
section .PointerToRawData = section .VirtualAddress
582
621
section .PointerToRawData_adj = section .VirtualAddress
583
622
# Parser exports and find the syscall indices
584
623
pe .parse_data_directories (directories = [DIRECTORY_ENTRY ["IMAGE_DIRECTORY_ENTRY_EXPORT" ]])
624
+ return pe .DIRECTORY_ENTRY_EXPORT .symbols if hasattr (pe , "DIRECTORY_ENTRY_EXPORT" ) else []
625
+
626
+ def _setup_syscalls (self ):
627
+ # Load the ntdll module from memory
628
+ ntdll = self ._find_module ("ntdll.dll" )
585
629
syscalls = []
586
- for export in pe . DIRECTORY_ENTRY_EXPORT . symbols :
630
+ for export in self . _parse_module_exports ( ntdll ) :
587
631
if export .name and export .name .startswith (b"Zw" ):
588
632
syscalls .append ((export .address , export .name .decode ("utf-8" )))
589
633
elif export .name == b"Wow64Transition" :
@@ -698,9 +742,50 @@ def _hook_mem(uc: Uc, access, address, size, value, dp: Dumpulator):
698
742
print (f"unmapped fetch of { address :0x} [{ size :0x} ] = { value :0x} , cip = { dp .regs .cip :0x} " )
699
743
return False
700
744
745
+ def _get_regs (instr ):
746
+ regs = set ()
747
+ for op in instr .operands :
748
+ if op .type == CS_OP_REG :
749
+ regs .add (instr .reg_name (op .value .reg ))
750
+ elif op .type == CS_OP_MEM :
751
+ if op .value .mem .base != 0 :
752
+ regs .add (instr .reg_name (op .value .mem .base ))
753
+ if op .value .mem .index != 0 :
754
+ regs .add (instr .reg_name (op .value .mem .index ))
755
+ for reg in instr .regs_read :
756
+ regs .add (instr .reg_name (reg ))
757
+ for reg in instr .regs_write :
758
+ regs .add (instr .reg_name (reg ))
759
+ return regs
701
760
702
761
def _hook_code (uc : Uc , address , size , dp : Dumpulator ):
703
- print (f"instruction: { address :0x} { dp .read (address , size ).hex ()} " )
762
+ code = dp .read (address , size )
763
+ instr = next (dp .cs .disasm (code , address , 1 ))
764
+ address_name = dp .exports .get (address , "" )
765
+
766
+ module = ""
767
+ if dp .last_module and dp .last_module .baseaddress <= address < dp .last_module .baseaddress + dp .last_module .size :
768
+ # same module again
769
+ pass
770
+ else :
771
+ # new module
772
+ dp .last_module = dp .find_module_by_addr (address )
773
+ if dp .last_module :
774
+ module = dp .last_module .name .split ("\\ " )[- 1 ].lower ()
775
+
776
+ if address_name :
777
+ address_name = " " + address_name
778
+ elif module :
779
+ address_name = " " + module
780
+
781
+ line = f"0x{ address :x} { address_name } |{ instr .mnemonic } "
782
+ if instr .op_str :
783
+ line += " "
784
+ line += instr .op_str
785
+ for reg in _get_regs (instr ):
786
+ line += f"|{ reg } =0x{ dp .regs .__getattr__ (reg ):x} "
787
+ line += "\n "
788
+ dp .trace .write (line )
704
789
return True
705
790
706
791
@@ -758,3 +843,7 @@ def _hook_syscall(uc: Uc, dp: Dumpulator):
758
843
else :
759
844
print (f"syscall index { index :0x} out of range" )
760
845
uc .emu_stop ()
846
+
847
+ def _hook_invalid (uc : Uc , address , dp : Dumpulator ):
848
+ print (f"invalid instruction at { address :0x} " )
849
+ return False
0 commit comments