From 92f0be9aab12225785789817a965a5e10803231b Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 31 May 2022 10:21:55 +0100 Subject: [PATCH 1/5] Fix thumbnail memory leaks --- synapse/rest/media/v1/media_repository.py | 265 ++++++++++++---------- synapse/rest/media/v1/thumbnailer.py | 62 ++++- 2 files changed, 192 insertions(+), 135 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 20af36653811..a551458a9fef 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -587,15 +587,16 @@ async def generate_local_exact_thumbnail( ) return None - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), - self._generate_thumbnail, - thumbnailer, - t_width, - t_height, - t_method, - t_type, - ) + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) if t_byte_source: try: @@ -657,15 +658,16 @@ async def generate_remote_exact_thumbnail( ) return None - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), - self._generate_thumbnail, - thumbnailer, - t_width, - t_height, - t_method, - t_type, - ) + with thumbnailer: + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + self._generate_thumbnail, + thumbnailer, + t_width, + t_height, + t_method, + t_type, + ) if t_byte_source: try: @@ -749,119 +751,134 @@ async def _generate_thumbnails( ) return None - m_width = thumbnailer.width - m_height = thumbnailer.height - - if m_width * m_height >= self.max_image_pixels: - logger.info( - "Image too large to thumbnail %r x %r > %r", - m_width, - m_height, - self.max_image_pixels, - ) - return None - - if thumbnailer.transpose_method is not None: - m_width, m_height = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.transpose - ) + with thumbnailer: + m_width = thumbnailer.width + m_height = thumbnailer.height - # We deduplicate the thumbnail sizes by ignoring the cropped versions if - # they have the same dimensions of a scaled one. - thumbnails: Dict[Tuple[int, int, str], str] = {} - for requirement in requirements: - if requirement.method == "crop": - thumbnails.setdefault( - (requirement.width, requirement.height, requirement.media_type), - requirement.method, - ) - elif requirement.method == "scale": - t_width, t_height = thumbnailer.aspect( - requirement.width, requirement.height + if m_width * m_height >= self.max_image_pixels: + logger.info( + "Image too large to thumbnail %r x %r > %r", + m_width, + m_height, + self.max_image_pixels, ) - t_width = min(m_width, t_width) - t_height = min(m_height, t_height) - thumbnails[ - (t_width, t_height, requirement.media_type) - ] = requirement.method - - # Now we generate the thumbnails for each dimension, store it - for (t_width, t_height, t_type), t_method in thumbnails.items(): - # Generate the thumbnail - if t_method == "crop": - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type + return None + + if thumbnailer.transpose_method is not None: + m_width, m_height = await defer_to_thread( + self.hs.get_reactor(), thumbnailer.transpose ) - elif t_method == "scale": - t_byte_source = await defer_to_thread( - self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type + + # We deduplicate the thumbnail sizes by ignoring the cropped versions if + # they have the same dimensions of a scaled one. + thumbnails: Dict[Tuple[int, int, str], str] = {} + for requirement in requirements: + if requirement.method == "crop": + thumbnails.setdefault( + (requirement.width, requirement.height, requirement.media_type), + requirement.method, + ) + elif requirement.method == "scale": + t_width, t_height = thumbnailer.aspect( + requirement.width, requirement.height + ) + t_width = min(m_width, t_width) + t_height = min(m_height, t_height) + thumbnails[ + (t_width, t_height, requirement.media_type) + ] = requirement.method + + # Now we generate the thumbnails for each dimension, store it + for (t_width, t_height, t_type), t_method in thumbnails.items(): + # Generate the thumbnail + if t_method == "crop": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.crop, + t_width, + t_height, + t_type, + ) + elif t_method == "scale": + t_byte_source = await defer_to_thread( + self.hs.get_reactor(), + thumbnailer.scale, + t_width, + t_height, + t_type, + ) + else: + logger.error("Unrecognized method: %r", t_method) + continue + + if not t_byte_source: + continue + + file_info = FileInfo( + server_name=server_name, + file_id=file_id, + url_cache=url_cache, + thumbnail=ThumbnailInfo( + width=t_width, + height=t_height, + method=t_method, + type=t_type, + ), ) - else: - logger.error("Unrecognized method: %r", t_method) - continue - - if not t_byte_source: - continue - - file_info = FileInfo( - server_name=server_name, - file_id=file_id, - url_cache=url_cache, - thumbnail=ThumbnailInfo( - width=t_width, - height=t_height, - method=t_method, - type=t_type, - ), - ) - with self.media_storage.store_into_file(file_info) as (f, fname, finish): - try: - await self.media_storage.write_to_file(t_byte_source, f) - await finish() - finally: - t_byte_source.close() - - t_len = os.path.getsize(fname) - - # Write to database - if server_name: - # Multiple remote media download requests can race (when - # using multiple media repos), so this may throw a violation - # constraint exception. If it does we'll delete the newly - # generated thumbnail from disk (as we're in the ctx - # manager). - # - # However: we've already called `finish()` so we may have - # also written to the storage providers. This is preferable - # to the alternative where we call `finish()` *after* this, - # where we could end up having an entry in the DB but fail - # to write the files to the storage providers. + with self.media_storage.store_into_file(file_info) as ( + f, + fname, + finish, + ): try: - await self.store.store_remote_media_thumbnail( - server_name, - media_id, - file_id, - t_width, - t_height, - t_type, - t_method, - t_len, - ) - except Exception as e: - thumbnail_exists = await self.store.get_remote_media_thumbnail( - server_name, - media_id, - t_width, - t_height, - t_type, + await self.media_storage.write_to_file(t_byte_source, f) + await finish() + finally: + t_byte_source.close() + + t_len = os.path.getsize(fname) + + # Write to database + if server_name: + # Multiple remote media download requests can race (when + # using multiple media repos), so this may throw a violation + # constraint exception. If it does we'll delete the newly + # generated thumbnail from disk (as we're in the ctx + # manager). + # + # However: we've already called `finish()` so we may have + # also written to the storage providers. This is preferable + # to the alternative where we call `finish()` *after* this, + # where we could end up having an entry in the DB but fail + # to write the files to the storage providers. + try: + await self.store.store_remote_media_thumbnail( + server_name, + media_id, + file_id, + t_width, + t_height, + t_type, + t_method, + t_len, + ) + except Exception as e: + thumbnail_exists = ( + await self.store.get_remote_media_thumbnail( + server_name, + media_id, + t_width, + t_height, + t_type, + ) + ) + if not thumbnail_exists: + raise e + else: + await self.store.store_local_thumbnail( + media_id, t_width, t_height, t_type, t_method, t_len ) - if not thumbnail_exists: - raise e - else: - await self.store.store_local_thumbnail( - media_id, t_width, t_height, t_type, t_method, t_len - ) return {"width": m_width, "height": m_height} diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 390491eb83cc..81c0ce0f91c5 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -14,7 +14,8 @@ # limitations under the License. import logging from io import BytesIO -from typing import Tuple +from types import TracebackType +from typing import Optional, Tuple, Type from PIL import Image @@ -45,6 +46,9 @@ def set_limits(max_image_pixels: int) -> None: Image.MAX_IMAGE_PIXELS = max_image_pixels def __init__(self, input_path: str): + # Have we closed the image? + self._closed = False + try: self.image = Image.open(input_path) except OSError as e: @@ -79,6 +83,8 @@ def __init__(self, input_path: str): # A lot of parsing errors can happen when parsing EXIF logger.info("Error parsing image EXIF information: %s", e) + self.close() + def transpose(self) -> Tuple[int, int]: """Transpose the image using its EXIF Orientation tag @@ -89,7 +95,8 @@ def transpose(self) -> Tuple[int, int]: # Safety: `transpose` takes an int rather than e.g. an IntEnum. # self.transpose_method is set above to be a value in # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values. - self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type] + with self.image: + self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type] self.width, self.height = self.image.size self.transpose_method = None # We don't need EXIF any more @@ -122,9 +129,11 @@ def _resize(self, width: int, height: int) -> Image.Image: # If the image has transparency, use RGBA instead. if self.image.mode in ["1", "L", "P"]: if self.image.info.get("transparency", None) is not None: - self.image = self.image.convert("RGBA") + with self.image: + self.image = self.image.convert("RGBA") else: - self.image = self.image.convert("RGB") + with self.image: + self.image = self.image.convert("RGB") return self.image.resize((width, height), Image.ANTIALIAS) def scale(self, width: int, height: int, output_type: str) -> BytesIO: @@ -133,8 +142,8 @@ def scale(self, width: int, height: int, output_type: str) -> BytesIO: Returns: BytesIO: the bytes of the encoded image ready to be written to disk """ - scaled = self._resize(width, height) - return self._encode_image(scaled, output_type) + with self._resize(width, height) as scaled: + return self._encode_image(scaled, output_type) def crop(self, width: int, height: int, output_type: str) -> BytesIO: """Rescales and crops the image to the given dimensions preserving @@ -151,18 +160,21 @@ def crop(self, width: int, height: int, output_type: str) -> BytesIO: BytesIO: the bytes of the encoded image ready to be written to disk """ if width * self.height > height * self.width: + scaled_width = width scaled_height = (width * self.height) // self.width - scaled_image = self._resize(width, scaled_height) crop_top = (scaled_height - height) // 2 crop_bottom = height + crop_top - cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) + crop = (0, crop_top, width, crop_bottom) else: scaled_width = (height * self.width) // self.height - scaled_image = self._resize(scaled_width, height) + scaled_height = height crop_left = (scaled_width - width) // 2 crop_right = width + crop_left - cropped = scaled_image.crop((crop_left, 0, crop_right, height)) - return self._encode_image(cropped, output_type) + crop = (crop_left, 0, crop_right, height) + + with self._resize(scaled_width, scaled_height) as scaled_image: + with scaled_image.crop(crop) as cropped: + return self._encode_image(cropped, output_type) def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO: output_bytes_io = BytesIO() @@ -171,3 +183,31 @@ def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO: output_image = output_image.convert("RGB") output_image.save(output_bytes_io, fmt, quality=80) return output_bytes_io + + def close(self) -> None: + if self._closed: + return + self._closed = True + + # Since we run this on the finalizer then we need to handle `__init__` + # raising an exception before it can define `self.image`. + image = getattr(self, "image", None) + if image is None: + return + + image.close() + + def __enter__(self) -> "Thumbnailer": + return self + + def __exit__( + self, + type: Optional[Type[BaseException]], + value: Optional[BaseException], + traceback: Optional[TracebackType], + ) -> None: + self.close() + + def __del__(self) -> None: + # Make sure we actually do close the image, rather than leak data. + self.close() From f6e41af067e9195a4c126f691c7501fa57cfb05e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jun 2022 10:05:14 +0100 Subject: [PATCH 2/5] Newsfile --- changelog.d/12932.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12932.bugfix diff --git a/changelog.d/12932.bugfix b/changelog.d/12932.bugfix new file mode 100644 index 000000000000..506f92b42773 --- /dev/null +++ b/changelog.d/12932.bugfix @@ -0,0 +1 @@ +Fix potential memory leak when generating thumbnails. From 3d59ec390ab634006aaf2b4d8a155d2dcee26f48 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jun 2022 10:44:39 +0100 Subject: [PATCH 3/5] Don't close if we fail to parse exif --- synapse/rest/media/v1/thumbnailer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 81c0ce0f91c5..3e7a73e377cb 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -83,8 +83,6 @@ def __init__(self, input_path: str): # A lot of parsing errors can happen when parsing EXIF logger.info("Error parsing image EXIF information: %s", e) - self.close() - def transpose(self) -> Tuple[int, int]: """Transpose the image using its EXIF Orientation tag From 48332d3d56fddf44dc01c5442c9c7c8ecaeb382e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jun 2022 10:44:44 +0100 Subject: [PATCH 4/5] Docstrings --- synapse/rest/media/v1/thumbnailer.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/synapse/rest/media/v1/thumbnailer.py b/synapse/rest/media/v1/thumbnailer.py index 3e7a73e377cb..9b93b9b4f6eb 100644 --- a/synapse/rest/media/v1/thumbnailer.py +++ b/synapse/rest/media/v1/thumbnailer.py @@ -183,8 +183,16 @@ def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO: return output_bytes_io def close(self) -> None: + """Closes the underlying image file. + + Once closed no other functions can be called. + + Can be called multiple times. + """ + if self._closed: return + self._closed = True # Since we run this on the finalizer then we need to handle `__init__` @@ -196,6 +204,9 @@ def close(self) -> None: image.close() def __enter__(self) -> "Thumbnailer": + """Make `Thumbnailer` a context manager that calls `close` on + `__exit__`. + """ return self def __exit__( From 1eb3fd47cb1196fccf98d39185ebe98dfd693bb3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 1 Jun 2022 11:32:23 +0100 Subject: [PATCH 5/5] Empty commit