release v2.1.22

jorisschellekens · Mar 19, 2024 · 7d79fbc · 7d79fbc
1 parent 93d0740
commit 7d79fbc
Show file tree

Hide file tree

Showing 941 changed files with 66,532 additions and 2,145 deletions.
diff --git a/README.md b/README.md
@@ -8,6 +8,8 @@
 [![Public Method Documentation : 100%](https://img.shields.io/badge/public%20method%20documentation-100%25-green)]()
 [![Number of Tests : 751](https://img.shields.io/badge/number%20of%20tests-751-green)]()
 [![Python : 3.8 | 3.9 | 3.10 ](https://img.shields.io/badge/python-3.8%20&#124;%203.9%20&#124;%203.10-green)]()
+[![Type Checking : 98%](https://img.shields.io/badge/type%20checking-98%25-green)]()
+
 
 [![Downloads](https://pepy.tech/badge/borb)](https://pepy.tech/project/borb)
 [![Downloads](https://pepy.tech/badge/borb/month)](https://pepy.tech/project/borb)

diff --git a/borb/datastructure/cache_by_hash.py b/borb/datastructure/cache_by_hash.py
diff --git a/borb/datastructure/disjoint_set.py b/borb/datastructure/disjoint_set.py
@@ -46,6 +46,8 @@ def __len__(self):
     def add(self, x: typing.Any) -> "disjointset":
         """
         Add an element to this disjointset
+        :param x:   the element to be added
+        :return:    self
         """
         self._parents[x] = x
         self._ranks[x] = 0
@@ -54,6 +56,8 @@ def add(self, x: typing.Any) -> "disjointset":
     def find(self, x: typing.Any) -> typing.Any:
         """
         Find the root of an element in this disjointset
+        :param x:   the element for which to find the root element
+        :return:    the root element of the given element
         """
         if self._parents[x] == x:
             return x
@@ -63,12 +67,15 @@ def find(self, x: typing.Any) -> typing.Any:
     def pop(self, x: typing.Any) -> "disjointset":
         """
         Remove an element from this disjointset
+        :param x:   the element to be removed
+        :return:    self
         """
         raise NotImplementedError()
 
     def sets(self) -> typing.List[typing.List[typing.Any]]:
         """
         This function returns all equivalence sets in this disjointset
+        :return:    all equivalence sets of this disjointset
         """
         cluster_parents: typing.Dict[typing.Any, typing.Any] = {}
         for x, _ in self._parents.items():
@@ -82,6 +89,9 @@ def union(self, x: typing.Any, y: typing.Any) -> "disjointset":
         """
         Mark two elements in this disjointset as equivalent,
         propagating the equivalence throughout the disjointset
+        :param x:   the first element
+        :param y:   the second element
+        :return:    self
         """
         x_parent = self.find(x)
         y_parent = self.find(y)

diff --git a/borb/datastructure/str_trie.py b/borb/datastructure/str_trie.py
@@ -2,14 +2,22 @@
 # -*- coding: utf-8 -*-
 
 """
-This class represents a trie[str, typing.Any]
+In computer science, a trie (/ˈtraɪ/, /ˈtriː/), also called digital tree or prefix tree, is a type of k-ary search tree,
+a tree data structure used for locating specific keys from within a set. These keys are most often strings,
+with links between nodes defined not by the entire key, but by individual characters.
+In order to access a key (to recover its value, change it, or remove it), the trie is traversed depth-first,
+following the links between nodes, which represent each character in the key.
 """
 import typing
 
 
 class Trie:
     """
-    This class represents a trie[str, typing.Any]
+    In computer science, a trie (/ˈtraɪ/, /ˈtriː/), also called digital tree or prefix tree, is a type of k-ary search tree,
+    a tree data structure used for locating specific keys from within a set. These keys are most often strings,
+    with links between nodes defined not by the entire key, but by individual characters.
+    In order to access a key (to recover its value, change it, or remove it), the trie is traversed depth-first,
+    following the links between nodes, which represent each character in the key.
     """
 
     class TrieNode:

diff --git a/borb/io/filter/ascii85_decode.py b/borb/io/filter/ascii85_decode.py
@@ -31,6 +31,8 @@ class ASCII85Decode:
     def decode(bytes_in: bytes) -> bytes:
         """
         Decodes data encoded in an ASCII base-85 representation
+        :param bytes_in:    the input bytes
+        :return:            the output bytes
         """
         exceptions_to_throw = []
 

diff --git a/borb/io/filter/flate_decode.py b/borb/io/filter/flate_decode.py
@@ -2,9 +2,12 @@
 # -*- coding: utf-8 -*-
 
 """
-(PDF 1.2) Decompresses data encoded using the zlib/deflate
-compression method, reproducing the original text or binary
-data.
+LZW and Flate encoding compress more compactly if their input data is highly predictable.
+One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
+difference between that sample and a predictor function applied to earlier neighboring samples.
+If the predictor function works well, the postprediction data clusters toward 0.
+PDF supports two groups of Predictor functions.
+The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
 """
 import copy
 import typing
@@ -13,9 +16,12 @@
 
 class FlateDecode:
     """
-    (PDF 1.2) Decompresses data encoded using the zlib/deflate
-    compression method, reproducing the original text or binary
-    data.
+    LZW and Flate encoding compress more compactly if their input data is highly predictable.
+    One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
+    difference between that sample and a predictor function applied to earlier neighboring samples.
+    If the predictor function works well, the postprediction data clusters toward 0.
+    PDF supports two groups of Predictor functions.
+    The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
     """
 
     #
@@ -38,8 +44,17 @@ def decode(
         predictor: int = 1,
     ) -> bytes:
         """
-        Decompresses data encoded using the zlib/deflate
-        compression method
+        LZW and Flate encoding compress more compactly if their input data is highly predictable.
+        One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
+        difference between that sample and a predictor function applied to earlier neighboring samples.
+        If the predictor function works well, the postprediction data clusters toward 0.
+        PDF supports two groups of Predictor functions.
+        The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
+        :param bytes_in:            the input bytes
+        :param bits_per_component:  the number of bits per component
+        :param columns:             the number of columns
+        :param predictor:           which predictor to use
+        :return:                    the output bytes
         """
 
         # trivial case

diff --git a/borb/io/filter/lzw_decode.py b/borb/io/filter/lzw_decode.py
@@ -2,9 +2,8 @@
 # -*- coding: utf-8 -*-
 
 """
-Decompresses data encoded using the LZW (Lempel-Ziv-Welch)
-adaptive compression method, reproducing the original
-text or binary data.
+Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method,
+reproducing the original text or binary data.
 """
 import typing
 
@@ -55,9 +54,8 @@ def next(self, n) -> int:
 
 class LZWDecode:
     """
-    Decompresses data encoded using the LZW (Lempel-Ziv-
-    Welch) adaptive compression method, reproducing the original
-    text or binary data.
+    Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method,
+    reproducing the original text or binary data.
     """
 
     #
@@ -94,8 +92,9 @@ def _init_lookup_table(self):
 
     def decode(self, input: bytes):
         """
-        Decompresses data encoded using the LZW (Lempel-Ziv-Welch)
-        adaptive compression method
+        Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method
+        :param input:   the input bytes
+        :return:        the output bytes
         """
 
         # output

diff --git a/borb/io/filter/run_length_decode.py b/borb/io/filter/run_length_decode.py
@@ -35,8 +35,9 @@ class RunLengthDecode:
     @staticmethod
     def decode(bytes_in: bytes) -> bytes:
         """
-        Decompresses data encoded using a byte-oriented run-length
-        encoding algorithm
+        Decompresses data encoded using a byte-oriented run-length encoding algorithm
+        :param bytes_in:    the input bytes
+        :return:            the output bytes
         """
 
         # trivial case

diff --git a/borb/io/filter/stream_decode_util.py b/borb/io/filter/stream_decode_util.py
@@ -20,8 +20,9 @@
 
 def decode_stream(s: Stream) -> Stream:
     """
-    This function decodes a Stream, applying the filters specified in the Filter entry
-    of its stream dictionary
+    This function decodes a Stream, applying the filters specified in the Filter entry of its stream dictionary
+    :param s:   the input Stream object
+    :return:    the input Stream, modified to contain the decoded bytes
     """
     # fmt: off
     assert isinstance(s, Stream), "decode_stream only works on Stream objects"

diff --git a/borb/io/read/encryption/standard_security_handler.py b/borb/io/read/encryption/standard_security_handler.py
@@ -86,7 +86,7 @@ def __init__(
         assert len(self._o) == 32
 
         # /ID
-        trailer: typing.Optional["PDFObject"] = encryption_dictionary.get_parent()
+        trailer: typing.Optional["PDFObject"] = encryption_dictionary.get_parent()  # type: ignore[name-defined]
         assert trailer is not None
         assert isinstance(trailer, Dictionary)
         if "ID" in trailer:
@@ -95,7 +95,7 @@ def __init__(
         # (Required) A set of flags specifying which operations shall be permitted
         # when the document is opened with user access (see Table 22).
         assert "P" in encryption_dictionary
-        self._permissions: int = int(encryption_dictionary.get("P"))  # type: ignore [arg-type]
+        self._permissions: int = int(encryption_dictionary.get("P"))  # type: ignore[arg-type]
 
         # (Optional; PDF 1.4; only if V is 2 or 3) The length of the encryption key, in bits.
         # The value shall be a multiple of 8, in the range 40 to 128. Default value: 40.
@@ -113,9 +113,9 @@ def __init__(
         # Streams") shall be encrypted. Conforming products should respect this
         # value.
         # Default value: true.
-        self._encrypt_metadata: bool = encryption_dictionary.get(
-            "EncryptMetadata", Boolean(True)
-        )
+        # fmt: off
+        self._encrypt_metadata: bool = encryption_dictionary.get("EncryptMetadata", Boolean(True))
+        # fmt: on
 
         # verify password(s)
         password: typing.Optional[bytes] = None
@@ -347,6 +347,8 @@ def _unescape_pdf_syntax(
     def authenticate_owner_password(self, owner_password: bytes) -> bool:
         """
         Algorithm 7: Authenticating the owner password
+        :param owner_password:  the owner password
+        :return:                True if the owner password matches, False otherwise
         """
         # a) Compute an encryption key from the supplied password string, as described in steps (a) to (d) of
         # "Algorithm 3: Computing the encryption dictionary’s O (owner password) value".
@@ -368,6 +370,8 @@ def authenticate_owner_password(self, owner_password: bytes) -> bool:
     def authenticate_user_password(self, user_password: bytes) -> bool:
         """
         Algorithm 6: Authenticating the user password
+        :param user_password:   the user password
+        :return:                True if the user password matches, False otherwise
         """
         # a) Perform all but the last step of "Algorithm 4: Computing the encryption dictionary’s U (user password)
         # value (Security handlers of revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U (user
@@ -388,15 +392,25 @@ def authenticate_user_password(self, user_password: bytes) -> bool:
         return self._u == u_value
 
     def decrypt(self, object: AnyPDFType) -> AnyPDFType:
+        """
+        This function decrypts an object inside the PDF
+        :param object:  the object to be decrypted
+        :return:        the decrypted object
+        """
         return self.encrypt(object)
 
     def encrypt(self, object: AnyPDFType) -> AnyPDFType:
+        """
+        This function encrypts an object inside the PDF
+        :param object:  the object to be encrypted
+        :return:        the encrypted object
+        """
         # a) Obtain the object number and generation number from the object identifier of the string or stream to be
         # encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect
         # object containing it.
         reference: typing.Optional[Reference] = object.get_reference()
         if reference is None:
-            parent: typing.Optional["PDFObject"] = object.get_parent()
+            parent: typing.Optional["PDFObject"] = object.get_parent()  # type: ignore[name-defined]
             assert parent is not None
             reference = parent.get_reference()
         assert reference is not None

diff --git a/borb/io/read/font/font_dictionary_transformer.py b/borb/io/read/font/font_dictionary_transformer.py
@@ -50,6 +50,8 @@ def can_be_transformed(
     ) -> bool:
         """
         This function returns True if the object to be transformed is a /Font Dictionary
+        :param object:  the object to be transformed
+        :return:        True if the object is a /Font Dictionary, False otherwise
         """
         return (
             isinstance(object, dict)
@@ -66,7 +68,12 @@ def transform(
         event_listeners: typing.List[EventListener] = [],
     ) -> typing.Any:
         """
-        This function reads a /Font Dictionary from a byte stream
+        This function transforms a /Font Dictionary into a Font Object
+        :param object_to_transform:     the /Font Dictionary to transform
+        :param parent_object:           the parent Object
+        :param context:                 the ReadTransformerState (containing passwords, etc)
+        :param event_listeners:         the EventListener objects that may need to be notified
+        :return:                        a Font Object
         """
 
         # convert dictionary like structure
@@ -106,7 +113,6 @@ def transform(
         # Type 3 Font
         elif subtype_name == "Type3":
             font_obj = Type3Font()
-
         elif subtype_name == "CIDFontType0":
             font_obj = CIDType0Font()
         elif subtype_name == "CIDFontType2":