# Copyright 2023 Quarkslab
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple mapping interface
"""
from __future__ import annotations
import csv, logging
from pathlib import Path
from typing import TYPE_CHECKING
from qbindiff.types import Match
if TYPE_CHECKING:
from typing import Callable, Any, TypeAlias
from qbindiff.types import ExtendedMapping, Node, PathLike
ExtraAttrsType: TypeAlias = str | tuple[str, Callable[[Node], Any]]
[docs]
class Mapping:
"""
This class represents an interface to access the result of the matching analysis.
Its interface is independent of the underlying :py:obj:`Node` type manipulated.
"""
def __init__(
self, mapping: ExtendedMapping, unmatched_primary: set[Node], unmatched_secondary: set[Node]
):
self._matches = [Match(*x) for x in mapping]
self._primary_unmatched = unmatched_primary
self._secondary_unmatched = unmatched_secondary
def __iter__(self):
return iter(self._matches)
@property
def similarity(self) -> float:
"""
Sum of similarities of the diff (unbounded value)
"""
return sum(x.similarity for x in self._matches)
@property
def normalized_similarity(self) -> float:
"""
Normalized similarity of the diff (from 0 to 1)
"""
return (2 * self.similarity) / (self.nb_nodes_primary + self.nb_nodes_secondary)
@property
def squares(self) -> float:
"""
Number of matching squares
"""
return sum(x.squares for x in self._matches) / 2
[docs]
def add_match(
self,
node1: Node,
node2: Node,
similarity: float,
confidence: float,
squares: int,
) -> None:
"""
Add the given match between the two nodes.
:param node1: node in primary
:param node2: node in secondary
:param similarity: similarity metric as float
:param confidence: confidence in the result (0..1)
:param squares: Number of squares being made
"""
self._matches.append(Match(node1, node2, similarity, confidence, squares))
[docs]
def remove_match(self, match: Match) -> None:
"""
Remove the given matching from the matching.
:param match: Match object to remove from the matching
:return: None
"""
self._matches.remove(match)
@property
def primary_matched(self) -> set[Node]:
"""
Set of nodes matched in primary
"""
return {x.primary for x in self._matches}
@property
def primary_unmatched(self) -> set[Node]:
"""
Set of nodes unmatched in primary.
"""
return self._primary_unmatched
@property
def secondary_matched(self) -> set[Node]:
"""
Set of nodes matched in the secondary object.
"""
return {x.secondary for x in self._matches}
@property
def secondary_unmatched(self) -> set[Node]:
"""
Set of nodes unmatched in the secondary object.
"""
return self._secondary_unmatched
@property
def nb_match(self) -> int:
"""
Number of matches
"""
return len(self._matches)
@property
def nb_unmatched_primary(self) -> int:
"""
Number of unmatched nodes in primary.
"""
return len(self._primary_unmatched)
@property
def nb_unmatched_secondary(self) -> int:
"""
Number of unmatched nodes in secondary.
"""
return len(self._secondary_unmatched)
@property
def nb_nodes_primary(self) -> int:
"""
Total number of nodes in primary
"""
return self.nb_match + self.nb_unmatched_primary
@property
def nb_nodes_secondary(self) -> int:
"""
Total number of nodes in secondary.
"""
return self.nb_match + self.nb_unmatched_secondary
[docs]
def match_primary(self, node: Node) -> Match | None:
"""
Returns the match associated with the given primary node (if any).
:param node: node to match in primary
:return: optional match
"""
for m in self._matches:
if m.primary == node:
return m
return None
[docs]
def match_secondary(self, node: Node) -> Match | None:
"""
Returns the match associated with the given secondary node (if any).
:param node: node to match in secondary
:return: optional match
"""
for m in self._matches:
if m.secondary == node:
return m
return None
[docs]
def is_match_primary(self, node: Node) -> bool:
"""
Returns true if the node in primary has been matched with a node in secondary.
:param node: ndoe to match in primary
:returns: whether the node has been matched
"""
return self.match_primary(node) is not None
[docs]
def is_match_secondary(self, node: Node) -> bool:
"""
Returns true if the node in secondary has been matched with a node in primary.
:param node: ndoe to match in secondary
:returns: whether the node has been matched
"""
return self.match_secondary(node) is not None
[docs]
def to_csv(self, path: PathLike, *extra_attrs: ExtraAttrsType) -> None: # type: ignore[valid-type]
"""
Write the mapping into a csv file.
Additional attributes of the nodes to put in the csv can be optionally specified.
For example:
.. code-block:: python
:linenos:
# Adding the attribute "primary_addr" and "secondary_addr". The value will be obtained
# by accessing `function.addr`
mapping.to_csv("result.csv", "addr")
# Adding the attributes name and type. This will add the fields "primary_name",
# "secondary_name", "primary_type", "secondary_type"
mapping.to_csv("result.csv", ("name", lambda f: f.name.upper()), "type")
:param path: The file path of the csv file to write
:param extra_attrs: Additional attributes to put in the csv. Each attribute is either a
tuple (attribute_name, attribute_function) or a string *attribute_name*
"""
# Check the path
if isinstance(path, str):
path = Path(path)
if path.exists() and not path.is_file():
raise ValueError(f"path `{path}` already exists and is not a file.")
if path.exists():
logging.info(f"Overwriting file {path}")
# Extract the optional extra attributes
attrs_name = []
attrs_func = []
for extra_attr in extra_attrs:
match extra_attr:
case str(name):
attrs_name.append(f"primary_{name}")
attrs_name.append(f"secondary_{name}")
attrs_func.append(lambda f: getattr(f, name))
case (name, func):
attrs_name.append(f"primary_{name}")
attrs_name.append(f"secondary_{name}")
attrs_func.append(func)
with open(path, "w", newline="") as f:
writer = csv.writer(f)
writer.writerow(
("primary_node", "secondary_node", "similarity", "confidence", *attrs_name)
)
for match in self._matches:
# Get the extra attributes values
extra_values = []
for func in attrs_func:
extra_values.append(func(match.primary))
extra_values.append(func(match.secondary))
writer.writerow(
(
match.primary.get_label(),
match.secondary.get_label(),
match.similarity,
match.confidence,
*extra_values,
)
)