# Copyright 2023 Quarkslab
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Program
"""
from __future__ import annotations
from collections.abc import MutableMapping
from typing import TYPE_CHECKING
from pathlib import Path
from qbindiff.abstract import GenericGraph
from qbindiff.loader import Function
from qbindiff.loader.types import LoaderType
if TYPE_CHECKING:
import networkx
from networkx.classes.reportviews import OutEdgeView
from collections.abc import Callable, Iterator
from qbindiff.loader import Structure
from qbindiff.loader.backend.abstract import AbstractProgramBackend
from qbindiff.loader.types import ProgramCapability
from qbindiff.types import Addr
[docs]
class Program(MutableMapping, GenericGraph):
"""
Program class that shadows the underlying program backend used.
It is a :py:class:`MutableMapping`, where keys are function addresses and
values are :py:class:`Function` objects.
:param path: Path to the main file to load (depends on the underlying backend)
:param loader: The loader type. If not provided, the loader is inferred from the path
:param backend: Optional parameter to provide the object instance implementing the
AbstractProgramBackend interface
:param args: extra parameters passed to the Backend
:param kwargs: extra parameters forwarded to the backend constructor
The node label is the function address, the node itself is the :py:class:`Function` object
"""
def __init__(
self,
path: Path | str,
*args,
loader: LoaderType | None = None,
backend: AbstractProgramBackend | None = None,
**kwargs,
):
super().__init__()
path = Path(path)
# if a backend instance is directly provided use it
if loader is None and backend is not None:
self._backend = backend # Load directly from instanciated backend
else:
# Both loader and backend provided. Loader take precedence, warn the user.
if backend is not None:
logging.warning(
f"Both backend and loader provided for program {path}."
"The loader will take priority and the backend will be ignored."
)
# Try to infer it
if loader is None:
if path.suffix.casefold() == ".Quokka".casefold():
loader = LoaderType.quokka
elif path.suffix.casefold() == ".BinExport".casefold():
loader = LoaderType.binexport
# Match the resulting loader
if loader == LoaderType.ida:
from qbindiff.loader.backend.ida import ProgramBackendIDA
self._backend = ProgramBackendIDA(*args, **kwargs)
elif loader == LoaderType.binexport:
from qbindiff.loader.backend.binexport import ProgramBackendBinExport
self._backend = ProgramBackendBinExport(str(path), *args, **kwargs)
elif loader == LoaderType.quokka:
from qbindiff.loader.backend.quokka import ProgramBackendQuokka
self._backend = ProgramBackendQuokka(str(path), *args, **kwargs)
else:
raise NotImplementedError(f"Loader: {loader} not implemented")
self._filter = lambda x: True
self._functions: dict[Addr, Function] = {} # underlying dictionary containing the functions
self._load_functions()
[docs]
@staticmethod
def from_binexport(file_path: str, arch: str | None = None) -> Program:
"""
Load the Program using the binexport backend
:param file_path: File path to the binexport file
:param arch: Architecture to pass to the capstone disassembler. This is
useful when the binexport'ed architecture is not enough to
correctly disassemble the binary (for example with arm
thumb2 or some mips modes).
:return: Program instance
"""
return Program(file_path, arch=arch, loader=LoaderType.binexport)
[docs]
@staticmethod
def from_quokka(file_path: str, exec_path: str) -> Program:
"""
Load the Program using the Quokka backend.
:param file_path: File path to the binexport file
:param exec_path: Path of the raw binary
:return: Program instance
"""
return Program(file_path, exec_path=exec_path, loader=LoaderType.quokka)
[docs]
@staticmethod
def from_ida() -> Program:
"""
Load the program using the IDA backend
:return: Program instance
"""
return Program("", loader=LoaderType.ida)
[docs]
@staticmethod
def from_backend(backend: AbstractProgramBackend) -> Program:
"""
Load the Program from an instanciated program backend object
"""
return Program("", backend=backend)
def __repr__(self) -> str:
return "<Program:%s>" % self.name
def __iter__(self) -> Iterator[Function]:
"""
Iterate over all functions located in the program, using the filter registered.
:return: Iterator of all the functions
"""
yield from self._functions.values()
def __len__(self) -> int:
return len(self._functions)
def __getitem__(self, key):
return self._functions.__getitem__(key)
def __setitem__(self, key, value):
self._functions.__setitem__(key, value)
def __delitem__(self, key):
self._functions.__delitem__(key)
def _load_functions(self) -> None:
"""Load the functions from the backend"""
for function in map(Function.from_backend, self._backend.functions):
self[function.addr] = function
[docs]
def items(self) -> Iterator[tuple[Addr, Function]]: # type: ignore[override]
"""
Iterate over the items. Each item is {address: :py:class:`Function`}
:returns: A :py:class:`Iterator` over the functions. Each element
is a tuple (function_addr, function_obj)
"""
# yield function only if filter agree to keep it
yield from filter(lambda i: self._filter(i[0]), self._functions.items())
[docs]
def get_node(self, node_label: Addr) -> Function:
"""
Get the function identified by the address ``node_label``
:param node_label: the address of the function that will be returned
:returns: the function identified by its address
"""
return self[node_label]
@property
def node_labels(self) -> Iterator[Addr]:
"""
Iterate over the functions' address
:returns: An :py:class:`Iterator` over the functions' address
"""
yield from filter(self._filter, self._functions.keys())
@property
def nodes(self) -> Iterator[Function]:
"""
Iterate over the functions
:returns: An :py:class:`Iterator` over the functions
"""
yield from self.__iter__()
@property
def edges(self) -> OutEdgeView[tuple[Addr, Addr]]:
"""
Iterate over the edges. An edge is a pair (addr_a, addr_b)
:returns: An :py:class:`OutEdgeView` over the edges.
"""
return self.callgraph.edges
@property
def name(self) -> str:
"""
Returns the name of the program as defined by the backend
"""
return self._backend.name
@property
def structures(self) -> list[Structure]:
"""
Returns the list of structures defined in program
"""
return self._backend.structures
@property
def exec_path(self) -> str | None:
"""
The executable path if it has been specified, None otherwise
"""
return self._backend.exec_path
[docs]
def set_function_filter(self, func: Callable[[Addr], bool]) -> None:
"""
Filter out some functions, to ignore them in later processing.
.. warning: The filter only apply for __iter__, items functions and callgraph
property.
Accessing functions through the dictionary does not apply the filter
:param func: function take the function address (the node label) and returns
whether or not to keep it.
"""
self._filter = func
@property
def callgraph(self) -> networkx.DiGraph:
"""
The function callgraph with a Networkx DiGraph
"""
cg = self._backend.callgraph
funcs = list(self) # functions already filtered
return cg.subgraph([x.addr for x in funcs])
[docs]
def get_function(self, name: str) -> Function:
"""
Returns the function by its name
:param name: name of the function
:return: the function
"""
return self[self._backend.fun_names[name]]
[docs]
def follow_through(self, to_remove: Addr, target: Addr) -> None:
"""
Replace node `to_remove` with a follow-through edge from every parent of the
node with the node `target`.
Example : ``{ parents } -> (to_remove) -> (target)``
``--> { parents } -> (target)``
:param to_remove: node to remove
:param target: targe node
:return: None
"""
func = self[to_remove]
self.pop(to_remove)
for p_addr in list(func.parents):
# Remove edges
self[p_addr].children.remove(to_remove)
func.parents.remove(p_addr)
self._backend.callgraph.remove_edge(p_addr, to_remove)
# Add follow-through edge
self[p_addr].children.add(target)
self[target].parents.add(p_addr)
self._backend.callgraph.add_edge(p_addr, target)
for c_addr in list(func.children):
# Remove edges
func.children.remove(c_addr)
self[c_addr].parents.remove(to_remove)
self._backend.callgraph.remove_edge(to_remove, c_addr)
self._backend.callgraph.remove_node(to_remove)
[docs]
def remove_function(self, to_remove: Addr) -> None:
"""
Remove the node ``to_remove`` from the Call Graph of the program.
**WARNING**: The follow-through edges from the parents to the children are not
added. Example :
``{ parents } -> (to_remove) -> { children }``
``--> { parents } { children }``
:param to_remove: function_to_remove
:return: None
"""
func = self[to_remove]
self.pop(to_remove)
for p_addr in list(func.parents):
# Remove edges
self[p_addr].children.remove(to_remove)
func.parents.remove(p_addr)
self._backend.callgraph.remove_edge(p_addr, to_remove)
for c_addr in list(func.children):
# Remove edges
func.children.remove(c_addr)
self[c_addr].parents.remove(to_remove)
self._backend.callgraph.remove_edge(to_remove, c_addr)
self._backend.callgraph.remove_node(to_remove)
@property
def capabilities(self) -> ProgramCapability:
"""
Returns the underlying backend capabilities
"""
return self._backend.capabilities