Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions fastdeploy/input/ernie4_5_vl_processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@

from .ernie4_5_vl_processor import Ernie4_5_VLProcessor
from .process import DataProcessor, fancy_print
from .process_video import read_video_decord
from .process_video import read_video_paddlecodec

This comment was marked as outdated.

from .utils.video_utils import VideoReaderWrapper

__all__ = [
"DataProcessor",
"fancy_print",
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",
"Ernie4_5_VLProcessor",
]
6 changes: 3 additions & 3 deletions fastdeploy/input/ernie4_5_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from fastdeploy.utils import data_processor_logger

from .image_preprocessor.image_preprocessor_adaptive import AdaptiveImageProcessor
from .process_video import read_frames_decord, read_video_decord
from .process_video import read_frames_paddlecodec, read_video_paddlecodec
from .utils.render_timestamp import render_frame_timestamp


Expand Down Expand Up @@ -630,7 +630,7 @@ def _extract_labels(self, outputs: Dict, tgts: List[str]) -> None:
outputs["labels"] = labels

def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]:
reader, meta, path = read_video_decord(url, save_to_disk=False)
reader, meta, path = read_video_paddlecodec(url, save_to_disk=False)

video_frame_args = dict()
video_frame_args["fps"] = item.get("fps", self.fps)
Expand All @@ -641,7 +641,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> List[Image.Image]:

video_frame_args = self._set_video_frame_args(video_frame_args, meta)

frames_data, _, timestamps = read_frames_decord(
frames_data, _, timestamps = read_frames_paddlecodec(
path,
reader,
meta,
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/input/ernie4_5_vl_processor/process_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from .utils.video_utils import VideoReaderWrapper


def read_video_decord(video_path, save_to_disk):
"""get reader and meta by decord"""
def read_video_paddlecodec(video_path, save_to_disk):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug Ernie 这条读取链路同样沿用了 bytes -> io.BytesIO 的旧 decord 适配方式;切到 paddlecodec 后,非 GIF 视频字节会作为 BytesIO 传进 VideoDecoder,默认 0.1 后端无法识别。

建议修复方式:这里直接保留原始 bytes 传给 VideoReaderWrapper,并把 GIF/非 GIF 的 bytes 处理统一放到 wrapper 中;修复后补一个非 GIF bytes 输入的测试,fake decoder 需要校验收到的是 bytes 或真实临时路径,而不是 BytesIO

"""get reader and meta by paddlecodec"""
# video_path = get_downloadable(video_path, save_to_disk=save_to_disk)
if isinstance(video_path, VideoReaderWrapper):
video_reader = video_path
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_frame_indices(
return frame_indices


def read_frames_decord(
def read_frames_paddlecodec(
video_path,
video_reader,
video_meta,
Expand All @@ -126,7 +126,7 @@ def read_frames_decord(
frame_indices=None,
tol=10,
):
"""get frames by decord"""
"""get frames by paddlecodec"""

if frame_indices is None:
frame_indices = get_frame_indices(
Expand Down
82 changes: 68 additions & 14 deletions fastdeploy/input/ernie4_5_vl_processor/utils/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@
import os
from tempfile import NamedTemporaryFile as ntf

import decord
import numpy as np
import paddle

from fastdeploy.utils import get_logger

try:
# moviepy 1.0
Expand All @@ -27,6 +30,8 @@
# moviepy 2.0
import moviepy as mp

logger = get_logger("video_utils")


def is_gif(data: bytes) -> bool:
"""
Expand All @@ -35,19 +40,24 @@ def is_gif(data: bytes) -> bool:
return data[:6] in (b"GIF87a", b"GIF89a")


class VideoReaderWrapper(decord.VideoReader):
"""
Solving memory leak bug
class _NumpyFrame:
"""Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec."""

def __init__(self, array):
self._array = array

def asnumpy(self):
return self._array

https://github.com/dmlc/decord/issues/208
"""

class VideoReaderWrapper:
"""paddlecodec VideoDecoder wrapper with GIF support."""

def __init__(self, video_path, *args, **kwargs):
with ntf(delete=True, suffix=".gif") as gif_file:
gif_input = None
self.original_file = None
if isinstance(video_path, str):
self.original_file = video_path
if video_path.lower().endswith(".gif"):
gif_input = video_path
elif isinstance(video_path, bytes):
Expand All @@ -70,14 +80,58 @@ def __init__(self, video_path, *args, **kwargs):
video_path = mp4_file.name
self.original_file = video_path

super().__init__(video_path, *args, **kwargs)
self.seek(0)
with paddle.use_compat_guard(enable=True, scope={"torchcodec"}):
try:
import sys

from torchcodec.decoders import VideoDecoder

sys.modules["torchcodec"] = None

This comment was marked as outdated.

except (ImportError, RuntimeError) as e:
logger.error(
f"Failed to load 'torchcodec' backend via Paddle proxy.\n"
f" - Common Causes:\n"
f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n"
f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n"
f" - Recommended Fix Steps:\n"
f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n"
f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n"
f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n"
f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n"
f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n"
f" - Original Error: {e}"
)
raise
PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0))
self._decoder = VideoDecoder(
video_path,
seek_mode="exact",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug Ernie 专用 wrapper 也传入了 seek_mode,而默认依赖固定到 paddlecodec==0.1.0 时该参数不可用;read_video_paddlecodec() 会在创建 VideoDecoder 时直接失败。

建议修复方式:和共享 fastdeploy.input.video_utils 保持一致,删除该参数,或同步提升并固定所有 requirements 中的 paddlecodec 版本到支持 seek_mode 的版本,同时让单测使用会拒绝未知 kwargs 的 fake decoder。

num_ffmpeg_threads=PADDLECODEC_NUM_THREADS,
device=kwargs.get("device", "cpu"),
dimension_order="NHWC",
)

def __len__(self):
return self._decoder.metadata.num_frames

def __getitem__(self, key):
frames = super().__getitem__(key)
self.seek(0)
return frames
if isinstance(key, (int, np.integer)):
frame = self._decoder.get_frames_at(indices=[int(key)]).data[0]
return _NumpyFrame(frame.numpy())
if isinstance(key, slice):
indices = list(range(*key.indices(len(self))))
else:
indices = list(key) if not isinstance(key, list) else key
frames = self._decoder.get_frames_at(indices=indices).data
return _NumpyFrame(frames.numpy())

def get_avg_fps(self):
return self._decoder.metadata.average_fps

def __del__(self):
if self.original_file and os.path.exists(self.original_file):
os.remove(self.original_file)
original_file = getattr(self, "original_file", None)
if original_file and os.path.exists(original_file):
try:
os.remove(original_file)
except OSError:
pass
4 changes: 2 additions & 2 deletions fastdeploy/input/paddleocr_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_paddleocr as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -530,7 +530,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/qwen3_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -681,7 +681,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/input/qwen_vl_processor/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from fastdeploy.entrypoints.chat_utils import parse_chat_messages
from fastdeploy.input.mm_data_processor import MMBaseDataProcessor
from fastdeploy.input.utils import IDS_TYPE_FLAG
from fastdeploy.input.video_utils import read_video_decord
from fastdeploy.input.video_utils import read_video_paddlecodec
from fastdeploy.input.video_utils import sample_frames_qwen as sample_frames
from fastdeploy.multimodal.hasher import MultimodalHasher
from fastdeploy.utils import data_processor_logger
Expand Down Expand Up @@ -531,7 +531,7 @@ def _load_and_process_video(self, url: str, item: Dict) -> Tuple[np.ndarray, Dic
- frames: Processed video frames as numpy array
- metadata: Updated video metadata dictionary
"""
reader, meta, _ = read_video_decord(url, save_to_disk=False)
reader, meta, _ = read_video_paddlecodec(url, save_to_disk=False)

# Apply frame sampling if fps or target_frames specified
fps = item.get("fps", self.fps)
Expand Down
81 changes: 60 additions & 21 deletions fastdeploy/input/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

"""Shared video utilities: VideoReaderWrapper, read_video_decord, and sample_frames."""
"""Shared video utilities: VideoReaderWrapper, read_video_paddlecodec, and sample_frames."""

import io
import math
Expand All @@ -21,13 +21,16 @@
from typing import Optional, Union

import numpy as np
import paddle

from fastdeploy.input.image_processors.common import ceil_by_factor, floor_by_factor
from fastdeploy.utils import data_processor_logger
from fastdeploy.utils import data_processor_logger, get_logger

logger = get_logger("video_utils")

__all__ = [
"VideoReaderWrapper",
"read_video_decord",
"read_video_paddlecodec",

This comment was marked as outdated.

"sample_frames",
"sample_frames_qwen",
"sample_frames_paddleocr",
Expand All @@ -44,15 +47,20 @@ def _is_gif(data: bytes) -> bool:
return data[:6] in (b"GIF87a", b"GIF89a")


class VideoReaderWrapper:
"""decord.VideoReader wrapper that fixes a memory leak and adds GIF support.
class _NumpyFrame:
"""Wrapper so that frame[idx].asnumpy() keeps working with paddlecodec."""

Reference: https://github.com/dmlc/decord/issues/208
"""
def __init__(self, array):
self._array = array

def asnumpy(self):
return self._array

def __init__(self, video_path, *args, **kwargs):
import decord

class VideoReaderWrapper:
"""paddlecodec VideoDecoder wrapper with GIF support."""

def __init__(self, video_path, *args, **kwargs):
try:
# moviepy 1.0
import moviepy.editor as mp
Expand Down Expand Up @@ -91,22 +99,53 @@ def __init__(self, video_path, *args, **kwargs):
video_path = mp4_path
self.original_file = video_path # temp mp4, cleaned up in __del__

self._reader = decord.VideoReader(video_path, *args, **kwargs)
self._reader.seek(0)
with paddle.use_compat_guard(enable=True, scope={"torchcodec"}):
try:
import sys

from torchcodec.decoders import VideoDecoder

sys.modules["torchcodec"] = None

This comment was marked as outdated.

except (ImportError, RuntimeError) as e:
logger.error(
f"Failed to load 'torchcodec' backend via Paddle proxy.\n"
f" - Common Causes:\n"
f" 1. Conflict with official 'torch' or 'torchcodec' packages.\n"
f" 2. Missing FFmpeg libraries or System library mismatch (CXXABI).\n"
f" - Recommended Fix Steps:\n"
f" 1. Install dependencies: `conda install ffmpeg -c conda-forge` or `apt-get update && apt-get install ffmpeg` \n"
f" 2. Uninstall conflicts: `pip uninstall torchcodec paddlecodec -y`\n"
f" 3. Reinstall packages: `pip install paddlecodec --force-reinstall`\n"
f" - If you encounter 'CXXABI' or 'libstdc++' errors, your system libraries might be outdated.\n"
f" Try prioritizing Conda libraries by running: `LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH python your_script.py`\n"
f" - Original Error: {e}"
)
raise
PADDLECODEC_NUM_THREADS = int(os.environ.get("PADDLECODEC_NUM_THREADS", 0))
self._decoder = VideoDecoder(
video_path,
seek_mode="exact",

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug requirements.txt 当前固定 paddlecodec==0.1.0,但这个版本通过 torchcodec.decoders.VideoDecoder 暴露的构造参数不包含 seek_mode,这里会在所有共享视频读取路径初始化时直接 TypeError

建议修复方式:要么去掉 seek_mode="exact" 并按 0.1.0 支持的参数构造 VideoDecoder,要么把 requirements*.txt 统一提升/固定到支持该参数的 paddlecodec 版本,并补一个真实构造 smoke test,避免 mock 吞掉非法参数。

num_ffmpeg_threads=PADDLECODEC_NUM_THREADS,
device=kwargs.get("device", "cpu"),
dimension_order="NHWC",
)

def __len__(self):
return len(self._reader)
return self._decoder.metadata.num_frames

def __getitem__(self, key):
frames = self._reader[key]
self._reader.seek(0)
return frames
if isinstance(key, (int, np.integer)):
frame = self._decoder.get_frames_at(indices=[int(key)]).data[0]
return _NumpyFrame(frame.numpy())
if isinstance(key, slice):
indices = list(range(*key.indices(len(self))))
else:
indices = list(key) if not isinstance(key, list) else key
frames = self._decoder.get_frames_at(indices=indices).data
return _NumpyFrame(frames.numpy())

def get_avg_fps(self):
return self._reader.get_avg_fps()

def seek(self, pos):
return self._reader.seek(pos)
return self._decoder.metadata.average_fps

def __del__(self):
original_file = getattr(self, "original_file", None)
Expand All @@ -118,11 +157,11 @@ def __del__(self):


# ---------------------------------------------------------------------------
# read_video_decord
# read_video_paddlecodec
# ---------------------------------------------------------------------------


def read_video_decord(video_path, save_to_disk: bool = False):
def read_video_paddlecodec(video_path, save_to_disk: bool = False):

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Bug 这个函数保留了旧的 bytes -> io.BytesIO 归一化,但新后端不再是 decord;默认固定的 paddlecodec 0.1 VideoDecoder 支持原始 bytes,不支持把非 GIF 视频作为 BytesIO 传入。所以上层传入视频字节流时会在 wrapper 内构造 decoder 失败。

建议修复方式:不要在这里把普通 bytes 转成 BytesIO;直接把原始 bytes 交给 VideoReaderWrapper,并在 wrapper 内只对 GIF bytes/BytesIO 做临时文件转码。非 GIF BytesIO 也应先读回原始 bytes 或落临时文件后再调用 VideoDecoder

"""Load a video file and return (video_reader, video_meta, video_path).

video_meta contains keys: "fps", "duration", "num_of_frame".
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ paddleformers>=1.1.1
redis
etcd3
httpx
tool_helpers
fast_dataindex
cupy-cuda12x
pybind11[global]
tabulate
Expand All @@ -23,7 +23,7 @@ xlwt
visualdl
setuptools-scm>=8
prometheus-client
decord
paddlecodec==0.1.0
moviepy
triton
crcmod
Expand Down
Loading
Loading