Source code for volatility3.framework.symbols.linux.utilities.module_extract

# This file is Copyright 2025 Volatility Foundation and licensed under the Volatility Software License 1.0
# which is available at https://www.volatilityfoundation.org/license/vsl-v1.0
import logging
import struct

from typing import (
    List,
    Optional,
    Tuple,
    Dict,
)

from volatility3 import framework
from volatility3.framework import interfaces, exceptions, symbols, deprecation
from volatility3.framework.constants import linux as linux_constants
from volatility3.framework.symbols.linux import extensions

vollog = logging.getLogger(__name__)

# This module is responsible for producing an ELF file of a kernel module (LKM) loaded in memory
# This extraction task is quite complicated as the Linux kernel discards the ELF header at load time
# Due to this, to support static analysis, we must create an ELF header and proper file based on the sections
# There are also several other significant complications that we must deal with when trying to extract an LKM
# that can be analyzed with static analysis tools
# First, the .strtab points somewhere random and is kept off the module structure, not with the other sections
# Second, all of the symbols (.symtab) have mangled members that we must patch for anything to make sense
# Third, the section name string table (.shstrtab) is not an allocated section, meaning its not in memory
# Not having the .shstrtab makes analysis impossible-to-difficult for static analysis tools. To work around this,
# we create the .shstrtab based on the sections in memory and then glue it in as the final section

# ModuleExtract.extract_module is the entry point and only visible method for plugins


# See PR #1773
[docs] @deprecation.renamed_class( deprecated_class_name="ModuleExtract", removal_date="2026-06-01", message="volatility3.framework.symbols.linux.utilities.module_extract.ModuleExtract is to be deprecated. Use volatility3.framework.symbols.linux.utilities.modules.ModuleExtract instead.", ) class ModuleExtract(interfaces.configuration.VersionableInterface): """Extracts Linux kernel module structures into an analyzable ELF file""" _version = (1, 0, 1) _required_framework_version = (2, 25, 0) framework.require_interface_version(*_required_framework_version) @classmethod def _find_section( cls, section_lookups: List[Tuple[str, int, int, int]], sym_address: int ) -> Optional[Tuple[str, int, int, int]]: """ Finds the section containing `sym_address` """ for name, index, address, size in section_lookups: if address <= sym_address < address + size: return name, index, address, size return None @classmethod def _get_st_info_for_sym( cls, sym: interfaces.objects.ObjectInterface, sym_address: int, sect_name: str ) -> bytes: """ This is a helper function called from `_fix_sym_table` Calculates the `st_info` value for the given symbol Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html """ if sym.st_name > 0: # Global symbol bind = linux_constants.STB_GLOBAL if sym_address == 0: sect_type = linux_constants.STT_NOTYPE elif sect_name: # rela = relocations if sect_name.find(".text") != -1 and sect_name.find(".rela") == -1: sect_type = linux_constants.STT_FUNC else: sect_type = linux_constants.STT_OBJECT else: # outside the module being extracted sect_type = linux_constants.STT_NOTYPE else: # Local symbol bind = linux_constants.STB_LOCAL sect_type = linux_constants.STT_SECTION # Build the st_info as ELF32_ST_INFO/ELF64_ST_INFO bind_bits = (bind << 4) & 0xF0 type_bits = sect_type & 0xF st_info_int = (bind_bits | type_bits) & 0xFF return struct.pack("B", st_info_int) @classmethod def _get_fixed_sym_fields( cls, st_fmt: str, sym: interfaces.objects.ObjectInterface, sections: List[Tuple[str, int, int, int]], ) -> Tuple[str, int, int, int]: """ This is a helper function called from `_fix_sym_table` The st_value, st_info, and st_shndx fields of each symbol are changed/mangled while loading Static analysis tools do not understand these transformed values as they only make sense to the kernel loader We must de-mangle these to have analysis tools understand symbols (a key aspect) """ # Start by trying to map a symbol to its section sym_address = sym.st_value sect_info = cls._find_section(sections, sym_address) if not sect_info: # Symbol does not point into the module being extracted sect_name, sect_index, sect_address = None, None, None st_value_int = sym_address else: # relative address inside the section sect_name, sect_index, sect_address, _ = sect_info st_value_int = sym_address - sect_address # Get the fixed st_value, st_info, and st_shndx that are broken in the mapped file # formatted to be written into the extracted file st_value = struct.pack(st_fmt, st_value_int) # returns formatted to be written into the extracted file st_info = cls._get_st_info_for_sym(sym, sym_address, sect_name) # format to reference its section, if any if sect_name: st_shndx = struct.pack("<H", sect_index) else: st_shndx = struct.pack("<H", sym.st_shndx) return sect_name, st_value, st_info, st_shndx @classmethod def _fix_sym_table( cls, context: interfaces.context.ContextInterface, vmlinux_name: str, original_sections: Dict[int, str], section_sizes: Dict[int, int], sym_type_name: str, st_fmt: str, module: extensions.module, ) -> Optional[bytes]: """ This function implements the most painful part of the reconstruction The symbols in .symtab are broken/mangled during loading. We need to normalize these for static analysis tools to understand the references. Without proper symbols, analysis is pretty pointless and gets nowhere. Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.symtab.html """ kernel = context.modules[vmlinux_name] # Gather the section information into a list section_lookups: List[Tuple[str, int, int, int]] = [] for index, (address, name) in enumerate(original_sections.items()): # We are fixing symtab references... if name == ".symtab": continue size = section_sizes[address] # Add 1 to account for leading NULL section section_lookups.append((name, index + 1, address, size)) # Build the array of symbols as they are in memory sym_type = kernel.get_type(sym_type_name) symbols = kernel.object( object_type="array", subtype=sym_type, offset=module.section_symtab, count=module.num_symtab, absolute=True, ) # used to hold the new (fixed) symbol table sym_table_data = b"" # build a correct/normalized Elf32_Sym or Elf64_Sym for each symbol for sym in symbols: # get the mangled fields' correct values sect_name, st_value, st_info, st_shndx = cls._get_fixed_sym_fields( st_fmt, sym, section_lookups ) # these aren't mangled during loading st_name = struct.pack("<I", sym.st_name) st_other = struct.pack("B", sym.st_other) st_size = struct.pack(st_fmt, sym.st_size) # The order as in the ELF specification. The order is not the same between 32 and 64 bit symbols if st_fmt == "<I": sym_data = st_name + st_value + st_size + st_info + st_other + st_shndx else: sym_data = st_name + st_info + st_other + st_shndx + st_value + st_size # This should never happen regardless of smear or other issues in the data. We build the structure to spec. if len(sym_data) != sym_type.size: vollog.error( f"Size of sym_data is {len(sym_data)} expected {sym_type.size} for symbol at value {sym.st_value} in section {sect_name}" ) return None # add the symbol's data to the overall symbol table sym_table_data += sym_data if len(sym_table_data) == 0: sym_table_data = None return sym_table_data @classmethod def _parse_sections( cls, context: interfaces.context.ContextInterface, vmlinux_name: str, module: extensions.module, ) -> Optional[Tuple[List, int, int]]: """ This function first parses the sections as maintained by the kernel It then orders the sections by load address, and then gathers the data of each section We also track the file_offset to correctly have alignment in the output file .symtab requires special handling as its so broken in memory as described in `_fix_sym_table` The data of .strtab is read directly off the module structure and not its section as the section from the original module has no meaning after loading as the kernel does not reference it. """ original_sections = {} for index, section in enumerate(module.get_sections()): name = section.get_name() original_sections[section.address] = name if not original_sections: return None kernel = context.modules[vmlinux_name] kernel_layer = context.layers[kernel.layer_name] if symbols.symbol_table_is_64bit(context, kernel.symbol_table_name): sym_type = "Elf64_Sym" elf_hdr_type = "Elf64_Ehdr" st_fmt = "<Q" else: sym_type = "Elf32_Sym" elf_hdr_type = "Elf32_Ehdr" st_fmt = "<I" # At this point, we have the sections starting addresses and names, # but the kernel does not track the size # To recover the size, we sort by address and then use the next section as the boundary to calculate size # .symtab (the symbol table) and .strtab (the strings table) require special handling. # All others can be read with padding # get the addresses in sorted order, can index into `original_sections` for names sorted_addresses = sorted(original_sections.keys()) # We need to track where .symtab is for symbol name offsets symtab_address = None strtab_index = None # Section data starts after the file header file_offset = kernel.get_type(elf_hdr_type).vol.size # The ordered set of sections along with their fixed data updated_sections: List[Tuple[str, int, int, bytes]] = [] # A mapping of section start addresses to sizes # original_sections does not have this information for reasons explained above section_sizes: Dict[int, int] = {} for index, address in enumerate(sorted_addresses): sect_name = original_sections[address] # Read out the string table. The full size is not kept, so we give each symbol's string up to 256 bytes if sect_name == ".strtab": # Read out symbol strings, giving up to 256 bytes per symbol data = kernel_layer.read( module.section_strtab, module.num_symtab * 256, pad=True ) # The string table should end with two NULLs, but the kernel does not enforce this end_index = data.find(b"\x00\x00") if end_index != -1: data = data[: end_index + 1] strtab_index = index # The symbol table in memory is completely transformed and broken from how it appears on disk # We need to process it last to fix the symbol table entries back to their correct values elif sect_name == ".symtab": symtab_address = address continue else: # Compute based on the boundary of the next address-sorted section try: # Get the next section in order next_address = sorted_addresses[index + 1] size = next_address - address except IndexError: ## We are at the last section so we need to pick a size size = 0x10000 vollog.debug(f"Defaulting section {sect_name} to size {size:#x}") # Read the section normally.. data = kernel_layer.read(address, size, pad=True) # store the section information in order updated_sections.append((sect_name, address, file_offset, data)) # Track sizes of each section section_sizes[address] = len(data) file_offset += len(data) if symtab_address: # Perform the painful demangling of symbol table structures data = cls._fix_sym_table( context, vmlinux_name, original_sections, section_sizes, sym_type, st_fmt, module, ) if not data: vollog.debug( f"Could not construct a symbol table for module at {module.vol.offset}. Cannot recover." ) return None, None, None symtab_index = len(updated_sections) # Manually add symtab with the correct data updated_sections.append((".symtab", symtab_address, file_offset, data)) else: vollog.debug( f"Did not find a .symtab section for module at {module.vol.offset:#x}. Cannot recover." ) return None, None, None return updated_sections, strtab_index, symtab_index @classmethod def _make_elf_header( cls, bits: int, sect_hdr_offset: int, num_sections: int ) -> Optional[bytes]: """ Creates a `bits` bit ELF header for the file based on recovered values Called last as it needs information computed from the sections Spec: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html """ if bits == 32: fmt = "<I" e_ident = ( b"\x7f\x45\x4c\x46\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) e_machine_int = 3 # EM_X86_86 e_ehsize_int = 52 e_shentsize_int = 40 header_size = 52 else: fmt = "<Q" e_ident = ( b"\x7f\x45\x4c\x46\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00" ) e_machine_int = 0x3E # EM_X86_64 e_ehsize_int = 64 e_shentsize_int = 64 header_size = 64 e_type = struct.pack("<H", 1) # relocatable e_machine = struct.pack("<H", e_machine_int) e_version = struct.pack("<I", 1) e_entry = b"\x00" * int( bits / 8 ) # The .init sections are freed after module load e_phoff = b"\x00" * int(bits / 8) # No program headers e_shoff = struct.pack(fmt, sect_hdr_offset) e_flags = b"\x00\x00\x00\x00" e_ehsize = struct.pack("<H", e_ehsize_int) e_phentsize = b"\x00\x00" e_phnum = b"\x00\x00" e_shentsize = struct.pack("<H", e_shentsize_int) e_shnum = struct.pack("<H", num_sections + 1) e_shstrndx = struct.pack("<H", num_sections) header = ( e_ident + e_type + e_machine + e_version + e_entry + e_phoff + e_shoff + e_flags + e_ehsize + e_phentsize + e_phnum + e_shentsize + e_shnum + e_shstrndx ) # should never happen as we make the header ourselves if len(header) != header_size: vollog.error( f"Making Elf header for arch {bits} created a header of {len(header)} bytes. Cannot proceed" ) return None return header @classmethod def _calc_sect_type(cls, section_name: str) -> Optional[int]: """ This function makes a best effort to map common section names to their attributes """ known_sections = { ".note.gnu.build-id": linux_constants.SHT_NOTE, ".text": linux_constants.SHT_PROGBITS, ".init.text": linux_constants.SHT_PROGBITS, ".exit.text": linux_constants.SHT_PROGBITS, ".static_call.text": linux_constants.SHT_PROGBITS, ".rodata": linux_constants.SHT_PROGBITS, ".modinfo": linux_constants.SHT_PROGBITS, "__param": linux_constants.SHT_PROGBITS, ".data": linux_constants.SHT_PROGBITS, ".gnu.linkonce.this_module": linux_constants.SHT_PROGBITS, ".comment": linux_constants.SHT_PROGBITS, ".shstrtab": linux_constants.SHT_STRTAB, ".symtab": linux_constants.SHT_SYMTAB, ".strtab": linux_constants.SHT_STRTAB, } sect_type_val = linux_constants.SHT_PROGBITS if section_name.find(".rela.") != -1: sect_type_val = linux_constants.SHT_RELA elif section_name in known_sections: sect_type_val = known_sections[section_name] return sect_type_val # all sections from memory are allocated (SHF_ALLOC) # special check certain other sections to try and ensure extra flags are added where needed @classmethod def _calc_sect_flags(cls, name: str) -> int: """ Make a best effort to map common section names to their permissions If we miss a section here, users of common static analysis tools can mark the sections are writable or executable manually, but that becomes very cumbersome and breaks initial analysis by the tool """ # All sections in memory are allocated (`A` in readelf -S) flags = linux_constants.SHF_ALLOC if name in [".text", ".init.text", ".exit.text", ".static_call.text"]: flags = flags | linux_constants.SHF_EXECINSTR elif name in [ ".data", ".init.data", ".exit.data", ".bss", "__tracepoints", ".data.once", "_ftrace_events", ".gnu.linkonce.this_module", ]: flags = flags | linux_constants.SHF_WRITE return flags @classmethod def _calc_link( cls, name: str, strtab_index: int, symtab_index: int, sect_type: int ) -> int: """ Calculates the link value for a section The most important ones are symtab indexes for relocations and to point the symbol table to the string tab Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.sheader.html """ # looking for RELA sections if name.find(".rela.") != -1: return symtab_index # per spec: "The section header index of the associated string table." elif sect_type == linux_constants.SHT_SYMTAB: return strtab_index return 0 @classmethod def _calc_entsize(cls, name: str, sect_type: int, bits: int) -> int: """ Calculates the entsize for relocation sections and the symbol table section Spec: https://refspecs.linuxbase.org/elf/gabi4+/ch4.sheader.html """ # looking for RELA sections if name.find(".rela.") != -1: return 24 # per spec: "The section header index of the associated string table." elif sect_type == linux_constants.SHT_SYMTAB: if bits == 32: return 16 else: return 24 return 0 @classmethod def _make_section_header( cls, bits: int, name_index: int, name: str, address: int, size: int, file_offset: int, strtab_index: int, symtab_index: int, ) -> Optional[bytes]: """ Creates a section header (Elf32_Shdr or Elf64_Shdr) for the given section """ if bits == 32: fmt = "<I" sect_size = 40 else: fmt = "<Q" sect_size = 64 sect_header_type_int = cls._calc_sect_type(name) flags = cls._calc_sect_flags(name) link = cls._calc_link(name, strtab_index, symtab_index, sect_header_type_int) entsize = cls._calc_entsize(name, sect_header_type_int, bits) try: sh_name = struct.pack("<I", name_index) sh_type = struct.pack("<I", sect_header_type_int) sh_flags = struct.pack(fmt, flags) sh_addr = struct.pack(fmt, address) sh_offset = struct.pack(fmt, file_offset) sh_size = struct.pack(fmt, size) sh_link = struct.pack("<I", link) sh_info = b"\x00" * 4 sh_addralign = struct.pack(fmt, 1) sh_entsize = struct.pack(fmt, entsize) # catch overflows of offset/address/size except struct.error: vollog.debug( f"Unable to build section header for section {name} at address {address:#x}" ) return None data = ( sh_name + sh_type + sh_flags + sh_addr + sh_offset + sh_size + sh_link + sh_info + sh_addralign + sh_entsize ) # This should never happen regardless of smear or other issues in the data. We build the structure to spec. if len(data) != sect_size: vollog.error( f"Size of section data is {len(data)} expected {sect_size} for section {name} at address {address:#x}" ) return None return data @classmethod def extract_module( cls, context: interfaces.context.ContextInterface, vmlinux_name: str, module: extensions.module, ) -> Optional[bytes]: # Bail early if bad address sent in try: hasattr(module.sect_attrs, "nsections") except exceptions.InvalidAddressException: vollog.debug(f"module at offset {module.vol.offset:#x} is paged out.") return None # Gather sections parse_sections_result = cls._parse_sections(context, vmlinux_name, module) if parse_sections_result is None: return None updated_sections, strtab_index, symtab_index = parse_sections_result kernel = context.modules[vmlinux_name] # Figure out header sizes if symbols.symbol_table_is_64bit(context, kernel.symbol_table_name): header_type = "Elf64_Ehdr" section_type = "Elf64_Shdr" bits = 64 else: header_type = "Elf32_Ehdr" section_type = "Elf32_Shdr" bits = 32 header_type_size = kernel.get_type(header_type).size section_type_size = kernel.get_type(section_type).size # Per Linux-spec, all LKMs must start with a null section header # This buffer is used to hold the headers as they are built sections_headers = b"\x00" * section_type_size # Holder of the data of the sections sections_data = b"" # the .shstrtab section is "\x00" + section name for each section # followed by a terminating null. # It starts with the null string (\x00) shstrtab_data = b"\x00" # Track where we end the sections and data to glue `.shstrtab` after last_file_offset = None last_sect_size = None # Start at 1 in the string table name_index = 1 # Create the actual section headers for index, (name, address, file_offset, section_data) in enumerate( updated_sections ): # Make the section header header_bytes = cls._make_section_header( bits, name_index, name, address, len(section_data), file_offset, strtab_index, symtab_index, ) if not header_bytes: vollog.debug(f"make_section_header failed for section {name}") return None # ndex into the string table name_index += len(name) + 1 # concatenate the header and section bytes sections_headers += header_bytes sections_data += section_data # track where we are so .shstrtab goes into correct offset last_file_offset = file_offset last_sect_size = len(section_data) # append each section name to what will become .shstrtab shstrtab_data += bytes(name, encoding="utf8") + b"\x00" # stick our own section reference string at end # name_index points to the end of the last section string after the loop ends shstrtab_data += b".shstrtab\x00" # create our .shstrtab section so sections have names sections_headers += cls._make_section_header( bits, name_index, ".shstrtab", 0, len(shstrtab_data), last_file_offset + last_sect_size, strtab_index, symtab_index, ) sections_data += shstrtab_data num_sections = len(updated_sections) + 1 header = cls._make_elf_header( bits, header_type_size + len(sections_data), num_sections, ) if not header: vollog.error( f"Hit error creating Elf header for module at {module.vol.offset:#x}" ) return None # Return our beautiful, hand-crafted, farm raised ELF file return header + sections_data + sections_headers