Source code for qbindiff.types

# Copyright 2023 Quarkslab
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Contains all the type alias/definitions used by the module

This module contains all definitions of the type aliases and the generic enums
used by qbindiff.
"""

from __future__ import annotations
from collections.abc import Iterable
from typing import Any, TypeAlias, Protocol, Literal, Sequence, TYPE_CHECKING

import numpy
from pathlib import Path
from scipy.sparse import csr_matrix, csr_array  # type: ignore[import-untyped]
from collections import namedtuple
import enum_tools.documentation
from enum import IntEnum

from qbindiff.abstract import GenericGraph, GenericNode

if TYPE_CHECKING:
    from collections.abc import Iterator
    from qbindiff import Program
    from qbindiff.features.extractor import FeatureCollector

FeatureValue: TypeAlias = float | dict[str, float]
"""
Type of a feature value.
"""

Positive: TypeAlias = float
"""
Float greater than zero
"""

Ratio: TypeAlias = float
"""
Float bewteen 0 and 1
"""

Idx: TypeAlias = int
"""
An integer representing an index in a matrix.
"""

Addr: TypeAlias = int
"""
An integer representing an address within a program
"""

NodeLabel: TypeAlias = Any  #: The node label of a generic graph

RawMapping: TypeAlias = tuple[list[Idx], list[Idx]]
"""
Pair of lists of indexes that are mapped together.
"""

Match = namedtuple("Match", "primary secondary similarity confidence squares")
"""
Match represent the matching between two functions and can hold the similarity between the two
"""

Dtype: TypeAlias = numpy.dtype
"""
Numpy data type
"""

# Numpy typing system is still very incomplete
ArrayLike1D: TypeAlias = Sequence[Any] | numpy.ndarray[Literal["N"], numpy.dtype[Any]]
"""
1 Dimensional arrays, either as sequences in python or as ndarrays in numpy.
"""

Vector: TypeAlias = numpy.ndarray
"""
Arbitrary d-Dimensional array. Used to represent a vector.
"""

Matrix: TypeAlias = numpy.ndarray
"""
Arbitrary nxd-Dimensional array. Used to represent a matrix.
"""

FeatureVectors: TypeAlias = numpy.ndarray
"""
Float nxd-Dimensional array. Each n rows is represented as a dimensionnal feature vector.
"""

AdjacencyMatrix: TypeAlias = numpy.ndarray
"""
Boolean nxn-Dimensional array. It's the adjacency matrix representation of the graph.
"""

SimMatrix: TypeAlias = numpy.ndarray
"""
Float nxm-Dimensional array. Records the pairwise similarity scores between nodes of both graphs to diff.
"""

SparseMatrix: TypeAlias = csr_matrix
"""
Float nxm-Dimensional array. A sparse version of the above SimMatrix
"""

Graph: TypeAlias = GenericGraph  #: generic Graph, iterable over the nodes

Node: TypeAlias = GenericNode  #: Generic node. This is the entity that will be matched

ExtendedMapping: TypeAlias = Iterable[tuple[Node, Node, float, float, int]]
"""
An extended version of RawMapping with two more lists recording pairing similarity and induced number of squares.
"""

SparseVector: TypeAlias = csr_array
"""
Float n-Dimensional sparse array.
"""

PathLike: TypeAlias = str | Path  #: Path


[docs] class GenericPrePass(Protocol): """Callback function type for Pre Passes""" def __call__( self, sim_matrix: SimMatrix, primary: Program, secondary: Program, primary_mapping: dict[Addr, Idx], secondary_mapping: dict[Addr, Idx], **kwargs, ) -> Iterator[int] | None: """ Execute the pass that operates on the similarity matrix inplace :param sim_matrix: The similarity matrix of between the primary and secondary, of type :py:class:`qbindiff.types:SimMatrix` :param primary: The primary binary of type :py:class:`qbindiff.loader.Program` :param secondary: The secondary binary of type :py:class:`qbindiff.loader.Program` :param primary_mapping: Mapping between the primary function addresses and their corresponding index :param secondary_mapping: Mapping between the secondary function addresses and their corresponding index :returns: Either an iterator in the range [0, 1000] (used for tracking progress) or None. """ raise NotImplementedError()
[docs] class GenericPostPass(Protocol): """Callback function type for Post Passes""" def __call__( self, sim_matrix: SimMatrix, primary: Program, secondary: Program, primary_mapping: dict[Addr, Idx], secondary_mapping: dict[Addr, Idx], primary_features: dict[Addr, FeatureCollector], secondary_features: dict[Addr, FeatureCollector], **kwargs, ) -> Iterator[int] | None: """ Execute the pass that operates on the similarity matrix inplace :param sim_matrix: The similarity matrix of between the primary and secondary, of type :py:class:`qbindiff.types:SimMatrix` :param primary: The primary binary of type :py:class:`qbindiff.loader.Program` :param secondary: The secondary binary of type :py:class:`qbindiff.loader.Program` :param primary_mapping: Mapping between the primary function addresses and their corresponding index :param secondary_mapping: Mapping between the secondary function addresses and their corresponding index :param primary_features: Mapping between function addresses and the associated FeatureCollector object for the primary program :param secondary_features: Mapping between function addresses and the associated FeatureCollector object for the secondary program :returns: Either an iterator in the range [0, 1000] (used for tracking progress) or None. """ raise NotImplementedError()
[docs] @enum_tools.documentation.document_enum class Distance(IntEnum): """ Enum of different (supported) distances used to compute the similarity matrix based on chosen features. """ canberra = 0 # doc: canberra distance euclidean = 1 # doc: euclidean distance cosine = 2 # doc: cosine distance haussmann = 3 # doc: haussmann distance