Skip to content

Commit

Permalink
release v2.1.22
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisschellekens committed Mar 19, 2024
1 parent 93d0740 commit 7d79fbc
Show file tree
Hide file tree
Showing 941 changed files with 66,532 additions and 2,145 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
[![Public Method Documentation : 100%](https://img.shields.io/badge/public%20method%20documentation-100%25-green)]()
[![Number of Tests : 751](https://img.shields.io/badge/number%20of%20tests-751-green)]()
[![Python : 3.8 | 3.9 | 3.10 ](https://img.shields.io/badge/python-3.8%20|%203.9%20|%203.10-green)]()
[![Type Checking : 98%](https://img.shields.io/badge/type%20checking-98%25-green)]()


[![Downloads](https://pepy.tech/badge/borb)](https://pepy.tech/project/borb)
[![Downloads](https://pepy.tech/badge/borb/month)](https://pepy.tech/project/borb)
Expand Down
65 changes: 0 additions & 65 deletions borb/datastructure/cache_by_hash.py

This file was deleted.

10 changes: 10 additions & 0 deletions borb/datastructure/disjoint_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ def __len__(self):
def add(self, x: typing.Any) -> "disjointset":
"""
Add an element to this disjointset
:param x: the element to be added
:return: self
"""
self._parents[x] = x
self._ranks[x] = 0
Expand All @@ -54,6 +56,8 @@ def add(self, x: typing.Any) -> "disjointset":
def find(self, x: typing.Any) -> typing.Any:
"""
Find the root of an element in this disjointset
:param x: the element for which to find the root element
:return: the root element of the given element
"""
if self._parents[x] == x:
return x
Expand All @@ -63,12 +67,15 @@ def find(self, x: typing.Any) -> typing.Any:
def pop(self, x: typing.Any) -> "disjointset":
"""
Remove an element from this disjointset
:param x: the element to be removed
:return: self
"""
raise NotImplementedError()

def sets(self) -> typing.List[typing.List[typing.Any]]:
"""
This function returns all equivalence sets in this disjointset
:return: all equivalence sets of this disjointset
"""
cluster_parents: typing.Dict[typing.Any, typing.Any] = {}
for x, _ in self._parents.items():
Expand All @@ -82,6 +89,9 @@ def union(self, x: typing.Any, y: typing.Any) -> "disjointset":
"""
Mark two elements in this disjointset as equivalent,
propagating the equivalence throughout the disjointset
:param x: the first element
:param y: the second element
:return: self
"""
x_parent = self.find(x)
y_parent = self.find(y)
Expand Down
12 changes: 10 additions & 2 deletions borb/datastructure/str_trie.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,22 @@
# -*- coding: utf-8 -*-

"""
This class represents a trie[str, typing.Any]
In computer science, a trie (/ˈtraɪ/, /ˈtriː/), also called digital tree or prefix tree, is a type of k-ary search tree,
a tree data structure used for locating specific keys from within a set. These keys are most often strings,
with links between nodes defined not by the entire key, but by individual characters.
In order to access a key (to recover its value, change it, or remove it), the trie is traversed depth-first,
following the links between nodes, which represent each character in the key.
"""
import typing


class Trie:
"""
This class represents a trie[str, typing.Any]
In computer science, a trie (/ˈtraɪ/, /ˈtriː/), also called digital tree or prefix tree, is a type of k-ary search tree,
a tree data structure used for locating specific keys from within a set. These keys are most often strings,
with links between nodes defined not by the entire key, but by individual characters.
In order to access a key (to recover its value, change it, or remove it), the trie is traversed depth-first,
following the links between nodes, which represent each character in the key.
"""

class TrieNode:
Expand Down
2 changes: 2 additions & 0 deletions borb/io/filter/ascii85_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ class ASCII85Decode:
def decode(bytes_in: bytes) -> bytes:
"""
Decodes data encoded in an ASCII base-85 representation
:param bytes_in: the input bytes
:return: the output bytes
"""
exceptions_to_throw = []

Expand Down
31 changes: 23 additions & 8 deletions borb/io/filter/flate_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
# -*- coding: utf-8 -*-

"""
(PDF 1.2) Decompresses data encoded using the zlib/deflate
compression method, reproducing the original text or binary
data.
LZW and Flate encoding compress more compactly if their input data is highly predictable.
One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
difference between that sample and a predictor function applied to earlier neighboring samples.
If the predictor function works well, the postprediction data clusters toward 0.
PDF supports two groups of Predictor functions.
The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
"""
import copy
import typing
Expand All @@ -13,9 +16,12 @@

class FlateDecode:
"""
(PDF 1.2) Decompresses data encoded using the zlib/deflate
compression method, reproducing the original text or binary
data.
LZW and Flate encoding compress more compactly if their input data is highly predictable.
One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
difference between that sample and a predictor function applied to earlier neighboring samples.
If the predictor function works well, the postprediction data clusters toward 0.
PDF supports two groups of Predictor functions.
The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
"""

#
Expand All @@ -38,8 +44,17 @@ def decode(
predictor: int = 1,
) -> bytes:
"""
Decompresses data encoded using the zlib/deflate
compression method
LZW and Flate encoding compress more compactly if their input data is highly predictable.
One way of increasing the predictability of many continuous-tone sampled images is to replace each sample with the
difference between that sample and a predictor function applied to earlier neighboring samples.
If the predictor function works well, the postprediction data clusters toward 0.
PDF supports two groups of Predictor functions.
The first, the TIFF group, consists of the single function that is Predictor 2 in the TIFF 6.0 specification.
:param bytes_in: the input bytes
:param bits_per_component: the number of bits per component
:param columns: the number of columns
:param predictor: which predictor to use
:return: the output bytes
"""

# trivial case
Expand Down
15 changes: 7 additions & 8 deletions borb/io/filter/lzw_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
# -*- coding: utf-8 -*-

"""
Decompresses data encoded using the LZW (Lempel-Ziv-Welch)
adaptive compression method, reproducing the original
text or binary data.
Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method,
reproducing the original text or binary data.
"""
import typing

Expand Down Expand Up @@ -55,9 +54,8 @@ def next(self, n) -> int:

class LZWDecode:
"""
Decompresses data encoded using the LZW (Lempel-Ziv-
Welch) adaptive compression method, reproducing the original
text or binary data.
Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method,
reproducing the original text or binary data.
"""

#
Expand Down Expand Up @@ -94,8 +92,9 @@ def _init_lookup_table(self):

def decode(self, input: bytes):
"""
Decompresses data encoded using the LZW (Lempel-Ziv-Welch)
adaptive compression method
Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method
:param input: the input bytes
:return: the output bytes
"""

# output
Expand Down
5 changes: 3 additions & 2 deletions borb/io/filter/run_length_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ class RunLengthDecode:
@staticmethod
def decode(bytes_in: bytes) -> bytes:
"""
Decompresses data encoded using a byte-oriented run-length
encoding algorithm
Decompresses data encoded using a byte-oriented run-length encoding algorithm
:param bytes_in: the input bytes
:return: the output bytes
"""

# trivial case
Expand Down
5 changes: 3 additions & 2 deletions borb/io/filter/stream_decode_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

def decode_stream(s: Stream) -> Stream:
"""
This function decodes a Stream, applying the filters specified in the Filter entry
of its stream dictionary
This function decodes a Stream, applying the filters specified in the Filter entry of its stream dictionary
:param s: the input Stream object
:return: the input Stream, modified to contain the decoded bytes
"""
# fmt: off
assert isinstance(s, Stream), "decode_stream only works on Stream objects"
Expand Down
26 changes: 20 additions & 6 deletions borb/io/read/encryption/standard_security_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def __init__(
assert len(self._o) == 32

# /ID
trailer: typing.Optional["PDFObject"] = encryption_dictionary.get_parent()
trailer: typing.Optional["PDFObject"] = encryption_dictionary.get_parent() # type: ignore[name-defined]
assert trailer is not None
assert isinstance(trailer, Dictionary)
if "ID" in trailer:
Expand All @@ -95,7 +95,7 @@ def __init__(
# (Required) A set of flags specifying which operations shall be permitted
# when the document is opened with user access (see Table 22).
assert "P" in encryption_dictionary
self._permissions: int = int(encryption_dictionary.get("P")) # type: ignore [arg-type]
self._permissions: int = int(encryption_dictionary.get("P")) # type: ignore[arg-type]

# (Optional; PDF 1.4; only if V is 2 or 3) The length of the encryption key, in bits.
# The value shall be a multiple of 8, in the range 40 to 128. Default value: 40.
Expand All @@ -113,9 +113,9 @@ def __init__(
# Streams") shall be encrypted. Conforming products should respect this
# value.
# Default value: true.
self._encrypt_metadata: bool = encryption_dictionary.get(
"EncryptMetadata", Boolean(True)
)
# fmt: off
self._encrypt_metadata: bool = encryption_dictionary.get("EncryptMetadata", Boolean(True))
# fmt: on

# verify password(s)
password: typing.Optional[bytes] = None
Expand Down Expand Up @@ -347,6 +347,8 @@ def _unescape_pdf_syntax(
def authenticate_owner_password(self, owner_password: bytes) -> bool:
"""
Algorithm 7: Authenticating the owner password
:param owner_password: the owner password
:return: True if the owner password matches, False otherwise
"""
# a) Compute an encryption key from the supplied password string, as described in steps (a) to (d) of
# "Algorithm 3: Computing the encryption dictionary’s O (owner password) value".
Expand All @@ -368,6 +370,8 @@ def authenticate_owner_password(self, owner_password: bytes) -> bool:
def authenticate_user_password(self, user_password: bytes) -> bool:
"""
Algorithm 6: Authenticating the user password
:param user_password: the user password
:return: True if the user password matches, False otherwise
"""
# a) Perform all but the last step of "Algorithm 4: Computing the encryption dictionary’s U (user password)
# value (Security handlers of revision 2)" or "Algorithm 5: Computing the encryption dictionary’s U (user
Expand All @@ -388,15 +392,25 @@ def authenticate_user_password(self, user_password: bytes) -> bool:
return self._u == u_value

def decrypt(self, object: AnyPDFType) -> AnyPDFType:
"""
This function decrypts an object inside the PDF
:param object: the object to be decrypted
:return: the decrypted object
"""
return self.encrypt(object)

def encrypt(self, object: AnyPDFType) -> AnyPDFType:
"""
This function encrypts an object inside the PDF
:param object: the object to be encrypted
:return: the encrypted object
"""
# a) Obtain the object number and generation number from the object identifier of the string or stream to be
# encrypted (see 7.3.10, "Indirect Objects"). If the string is a direct object, use the identifier of the indirect
# object containing it.
reference: typing.Optional[Reference] = object.get_reference()
if reference is None:
parent: typing.Optional["PDFObject"] = object.get_parent()
parent: typing.Optional["PDFObject"] = object.get_parent() # type: ignore[name-defined]
assert parent is not None
reference = parent.get_reference()
assert reference is not None
Expand Down
10 changes: 8 additions & 2 deletions borb/io/read/font/font_dictionary_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ def can_be_transformed(
) -> bool:
"""
This function returns True if the object to be transformed is a /Font Dictionary
:param object: the object to be transformed
:return: True if the object is a /Font Dictionary, False otherwise
"""
return (
isinstance(object, dict)
Expand All @@ -66,7 +68,12 @@ def transform(
event_listeners: typing.List[EventListener] = [],
) -> typing.Any:
"""
This function reads a /Font Dictionary from a byte stream
This function transforms a /Font Dictionary into a Font Object
:param object_to_transform: the /Font Dictionary to transform
:param parent_object: the parent Object
:param context: the ReadTransformerState (containing passwords, etc)
:param event_listeners: the EventListener objects that may need to be notified
:return: a Font Object
"""

# convert dictionary like structure
Expand Down Expand Up @@ -106,7 +113,6 @@ def transform(
# Type 3 Font
elif subtype_name == "Type3":
font_obj = Type3Font()

elif subtype_name == "CIDFontType0":
font_obj = CIDType0Font()
elif subtype_name == "CIDFontType2":
Expand Down
Loading

0 comments on commit 7d79fbc

Please sign in to comment.