Source code for bindiff.bindiff

from __future__ import absolute_import
import logging
import shutil
import os
import subprocess
import tempfile
from pathlib import Path
from typing import Union, Optional

from binexport import ProgramBinExport, FunctionBinExport, BasicBlockBinExport, InstructionBinExport

from bindiff.types import BindiffNotFound
from bindiff.file import BindiffFile, FunctionMatch, BasicBlockMatch


BINDIFF_BINARY = None
BINDIFF_PATH_ENV = "BINDIFF_PATH"
BIN_NAMES = ["bindiff", "bindiff.exe", "differ"]


def _check_bin_names(path: Path) -> bool:
    """
    Check if one of the BinDiff binary exists

    :param path: Path to the binary
    :return: bool
    """
    global BINDIFF_BINARY
    for name in BIN_NAMES:
        bin_path = path / name
        if bin_path.exists():
            BINDIFF_BINARY = bin_path.resolve().absolute()
            return True
    return False


def _check_environ() -> bool:
    """
    Check if BinDiff is already installed

    :return: bool
    """
    if BINDIFF_PATH_ENV in os.environ:
        return _check_bin_names(Path(os.environ[BINDIFF_PATH_ENV]))
    return False


def _check_default_path() -> bool:
    """
    Check if BinDiff is installed at its default location

    :return: bool
    """
    return _check_bin_names(Path("/opt/zynamics/BinDiff/bin"))


def _check_path() -> bool:
    """
    Check if the environment variable PATH contains BinDiff binaries

    :return: bool
    """
    if "PATH" in os.environ:
        for p in os.environ["PATH"].split(os.pathsep):
            if _check_bin_names(Path(p)):
                return True
    return False


[docs] class BinDiff(BindiffFile): """ BinDiff class. Parse the diffing result of Bindiff and apply it to the two ProgramBinExport given. All the diff result is embedded in the two programs object so after loading the class can be dropped if needed. .. warning:: the two programs given are mutated into :py:class:`ProgramBinDiff` object which inherit :py:class:`SimilarityMixin` and :py:class:`DictMatchMixin` which provides additional attributes and method to the class. """ def __init__( self, primary: Union[ProgramBinExport, str], secondary: Union[ProgramBinExport, str], diff_file: str, ): """ :param primary: first program diffed :param secondary: second program diffed :param diff_file: diffing file as generated by bindiff (differ more specifically) """ super(BinDiff, self).__init__(diff_file) #: Primary BinExport object self.primary = ProgramBinExport(primary) if isinstance(primary, str) else primary #: Secondary BinExport object self.secondary = ProgramBinExport(secondary) if isinstance(secondary, str) else secondary
[docs] def primary_unmatched_function(self) -> list[FunctionBinExport]: """ Return a list of the unmatched functions in the primary program. :return: list of unmatched functions in primary """ funs = [] for fun_addr, fun in self.primary.items(): if fun_addr not in self.primary_functions_match: funs.append(fun) return funs
[docs] def secondary_unmatched_function(self) -> list[FunctionBinExport]: """ Return a list of the unmatched functions in the secondary program. :return: list of unmatched functions in secondary """ funs = [] for fun_addr, fun in self.secondary.items(): if fun_addr not in self.secondary_functions_match: funs.append(fun) return funs
[docs] def iter_function_matches( self, ) -> list[tuple[FunctionBinExport, FunctionBinExport, FunctionMatch]]: """ Return a list of all the matched functions. Each element of the list is a tuple containing the function in the primary program, the matched function in the secondary program and the FunctionMatch object describing the match :return: list of tuple, each containing the primary function, the secondary function and the FunctionMatch object """ return [ (self.primary[match.address1], self.secondary[match.address2], match) for match in self.primary_functions_match.values() ]
def _unmatched_bbs( self, function: FunctionBinExport, map: dict[int, dict[int, BasicBlockMatch]] ) -> list[BasicBlockBinExport]: bbs = [] for bb_addr, bb in function.items(): if maps := map.get(bb_addr): # The block has been match but in another function thus unmatched here if function.addr not in maps: bbs.append(bb) else: bbs.append(bb) return bbs
[docs] def primary_unmatched_basic_block( self, function: FunctionBinExport ) -> list[BasicBlockBinExport]: """ Return a list of the unmatched basic blocks in the provided function. The function must be part of the primary program. :param function: A function of the primary program :return: list of unmatched basic blocks """ return self._unmatched_bbs(function, self.primary_basicblock_match)
[docs] def secondary_unmatched_basic_block( self, function: FunctionBinExport ) -> list[BasicBlockBinExport]: """ Return a list of the unmatched basic blocks in the provided function. The function must be part of the secondary program. :param function: A function of the secondary program :return: list of unmatched basic blocks """ return self._unmatched_bbs(function, self.secondary_basicblock_match)
[docs] def iter_basicblock_matches( self, function1: FunctionBinExport, function2: FunctionBinExport ) -> list[tuple[BasicBlockBinExport, BasicBlockBinExport, BasicBlockMatch]]: """ Return a list of all the matched basic blocks between the two provided functions. Each element of the list is a tuple containing the basic blocks of the primary and secondary functions and the BasicBlockMatch object describing the match. The first function must be part of the primary program while the second function must be part of the secondary program. :param function1: A function of the primary program :param function2: A function of the secondary program :return: list of tuple, each containing the primary basic block, the secondary basic block and the BasicBlockMatch object """ items = [] for bb_addr, bb in function1.items(): if maps := self.primary_basicblock_match.get(bb_addr): if match := maps.get(function1.addr): items.append((bb, function2[match.address2], match)) return items
def _unmatched_instrs( self, bb: BasicBlockBinExport, map: dict[int, dict[int, int]] ) -> list[InstructionBinExport]: instrs = [] for addr, instr in bb.instructions.items(): if addr not in map: instrs.append(instr) return instrs
[docs] def primary_unmatched_instruction(self, bb: BasicBlockBinExport) -> list[InstructionBinExport]: """ Return a list of the unmatched instructions in the provided basic block. The basic block must be part of the primary program. :param bb: A basic block belonging to the primary program :return: list of unmatched instructions """ return self._unmatched_instrs(bb, self.primary_instruction_match)
[docs] def secondary_unmatched_instruction( self, bb: BasicBlockBinExport ) -> list[InstructionBinExport]: """ Return a list of the unmatched instructions in the provided basic block. The basic block must be part of the secondary program. :param bb: A basic block belonging to the secondary program :return: list of unmatched instructions """ return self._unmatched_instrs(bb, self.secondary_instruction_match)
[docs] def iter_instruction_matches( self, block1: BasicBlockBinExport, block2: BasicBlockBinExport ) -> list[tuple[InstructionBinExport, InstructionBinExport]]: """ Return a list of all the matched instructions between the two provided basic blocks. Each element of the list is a tuple containing the instructions of the primary and secondary basic blocks. The first basic block must belong to the primary program while the second one must be part of the secondary program. :param block1: A basic block belonging to the primary program :param block2: A basic block belonging to the secondary program :return: list of tuple, each containing the primary instruction and the secondary instruction """ insts = [] for addr, instr in block1.instructions.items(): if inst_map := self.primary_instruction_match.get(addr): if addr2 := inst_map.get(block1.function.addr): insts.append((instr, block2.instructions[addr2])) return insts
[docs] def get_match( self, function: FunctionBinExport ) -> tuple[FunctionBinExport, FunctionMatch] | None: """ Get the function that matches the provided one. :param function: A function that belongs either to primary or secondary :return: A tuple with the matched function and the match object if there is a match for the provided function, otherwise None """ if self.primary.get(function.addr) == function: if match := self.primary_functions_match.get(function.addr): return self.secondary[match.address2], match elif self.secondary.get(function.addr) == function: if match := self.secondary_functions_match.get(function.addr): return self.primary[match.address1], match return None
[docs] def is_matched(self, function: FunctionBinExport) -> bool: """ :param function: A function that belongs either to primary or secondary. :return: True if there is a match for the provided function, False otherwise """ return self.get_match(function) is not None
[docs] @staticmethod def raw_diffing(p1_path: Union[Path, str], p2_path: Union[Path, str], out_diff: str) -> bool: """ Static method to diff two binexport files against each other and storing the diffing result in the given file :param p1_path: primary file path :param p2_path: secondary file path :param out_diff: diffing output file :return: True if successful, False otherwise """ # Make sure the bindiff binary is okay before doing any diffing BinDiff.assert_installation_ok() tmp_dir = Path(tempfile.mkdtemp()) f1 = Path(p1_path) f2 = Path(p2_path) cmd_line = [ BINDIFF_BINARY.as_posix(), f"--primary={p1_path}", f"--secondary={p2_path}", f"--output_dir={tmp_dir.as_posix()}", ] logging.debug(f"run diffing: {' '.join(cmd_line)}") process = subprocess.Popen(cmd_line, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = process.communicate() retcode = process.returncode if retcode != 0: logging.error(f"differ terminated with error code: {retcode}") return False # Now look for the generated file out_file = tmp_dir / "{}_vs_{}.BinDiff".format(f1.stem, f2.stem) if out_file.exists(): shutil.move(out_file, out_diff) else: # try iterating the directory to find the .BinExport file candidates = list(tmp_dir.iterdir()) if len(candidates) > 1: logging.warning("the output directory not meant to contain multiple files") found = False for file in candidates: if file.suffix == ".BinExport": shutil.move(file, out_diff) found = True break if not found: logging.error("diff file .BinExport not found") return False shutil.rmtree(tmp_dir, ignore_errors=True) return True
[docs] @staticmethod def from_binary_files(p1_path: str, p2_path: str, diff_out: str, override: bool = False) -> Optional["BinDiff"]: """ Diff two executable files. Thus it export .BinExport files from IDA and then diff the two resulting files in BinDiff. :param p1_path: primary binary file to diff :param p2_path: secondary binary file to diff :param diff_out: output file for the diff :param override: override Binexports files and diffing :return: BinDiff object representing the diff """ p1 = ProgramBinExport.from_binary_file(p1_path, override=override) p2 = ProgramBinExport.from_binary_file(p2_path, override=override) if p1 and p2: return BinDiff.from_binexport_files(p1, p2, diff_out, override=override) else: logging.error("p1 or p2 could not have been 'binexported'") return None
[docs] @staticmethod def from_binexport_files( p1_binexport: Union[ProgramBinExport, str], p2_binexport: Union[ProgramBinExport, str], diff_out: str, override: bool = False ) -> Optional["BinDiff"]: """ Diff two binexport files. Diff the two binexport files with bindiff and then load a BinDiff instance. :param p1_binexport: primary binexport file to diff (path or object) :param p2_binexport: secondary binexport file to diff (path or object) :param diff_out: output file for the diff :param override: override Binexports files and diffing :return: BinDiff object representing the diff """ p1_path = p1_binexport.path if isinstance(p1_binexport, ProgramBinExport) else p1_binexport p2_path = p2_binexport.path if isinstance(p2_binexport, ProgramBinExport) else p2_binexport if not Path(diff_out).exists() or override: retcode = BinDiff.raw_diffing(p1_path, p2_path, diff_out) return BinDiff(p1_binexport, p2_binexport, diff_out) if retcode else None else: return BinDiff(p1_binexport, p2_binexport, diff_out)
@staticmethod def _configure_bindiff_path() -> None: """ Check BinDiff access paths """ if not _check_environ(): if not _check_default_path(): if not _check_path(): logging.warning( f"Can't find a valid bindiff executable. (should be available in PATH or" f"as ${BINDIFF_PATH_ENV} env variable" )
[docs] @staticmethod def assert_installation_ok() -> None: """ Assert BinDiff is installed :raise BindiffNotFound: if the bindiff binary cannot be found """ BinDiff._configure_bindiff_path() if BINDIFF_BINARY is None: raise BindiffNotFound()
[docs] @staticmethod def is_installation_ok() -> bool: """ Check that bindiff is properly installed and can be found on the system. :return: true if the bindiff binary can be found. """ try: BinDiff.assert_installation_ok() return True except BindiffNotFound: return False