mirror of
git://git.yoctoproject.org/meta-raspberrypi.git
synced 2025-07-05 05:04:45 +02:00

Upgrades version of ffmpeg to 4.3.4 * Reason for not upgrading to 4.3.5 all ported raspberrypi team patches may not be included in that version/commit. * SRCREV set to 246e1a55a0eca931537d8706acd8b133c07beb05 Updates to PACKAGECONFIG * Only include --enable-opengl flag when opengl is set in DISTRO_FEATURES * Add new flag --enable-epoxy required by vout-egl * vout-egl requires both libepoxy and x11. Only enable vout-egl if x11 contained in DISTRO_FEATURES. * The remaining RPI-Distro related flags added through patches. Are only enabled if vc4graphics is disabled and userland graphics enabled. As an attempt to keep ffmpeg ./configure generic unless specified other wise. Removes TARGET_CFLAGS:append as include flags are set in ./configure via the 2001-configure-setup-for-OE-core-usage.patch patch. Replaces patches with updated patches used in actual commit. Adds four new patches to fix ./configure, compile, runtime bugs. PATCHES: - 2001-configure-setup-for-OE-core-usage.patch * The ./configure stage fails if neither x11 or wayland defined in DISTRO_FEATURES. When opengl enabled ./configure checks for relevant headers. The last header it checks for is ES2/gl.h which doesn't exists. Neither do the others if certain perameters are not meet. Patch addes check for GLES2/gl2.h which does exists. We use utilize GLESv2 to compile and link with. Patch also replaces where compiler find mmal and omx headers and libs. - 2002-libavdevice-opengl_enc-update-dynamic-function-loader.patch * After configure stage succeeds the compile stage fails as SelectedGetProcAddress isn't defined. It can't be define as if x11 isn't enabled. Patch defines SelectedGetProcAddress if x11 not enabled, but sdl2 enabled to SDL_GL_GetProcAddress. If neither sdl2 or x11 is enabled patch loads GL functions pointers at compile time versus dynamically at runtime. - 2003-libavcodec-fix-v4l2_req_devscan.patch * v412_req_devscan.h function definitions where different from v412_req_devscan.c function implementations. - 2004-libavcodec-omx-replace-opt-vc-path-with-usr-lib.patch * Fixes where libbcm_host.so and libopenmaxil.so are loaded from. Signed-off-by: Vincent Davis Jr <vince@underview.tech>
293 lines
11 KiB
Diff
293 lines
11 KiB
Diff
From: James Cowgill <jcowgill@debian.org>
|
|
Date: Sun, 11 Aug 2019 16:50:56 +0100
|
|
Subject: avcodec/arm/sbcenc: avoid callee preserved vfp registers
|
|
|
|
Upstream-Status: Inappropriate
|
|
|
|
RPI-Distro repo clones original ffmpeg and applies patches to enable
|
|
raspiberry pi support.
|
|
|
|
When compiling FFmpeg with GCC-9, some very random segfaults were
|
|
observed in code which had previously called down into the SBC encoder
|
|
NEON assembly routines. This was caused by these functions clobbering
|
|
some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was
|
|
using these registers to save local variables, but after these
|
|
functions returned, they would contain garbage.
|
|
|
|
Fix by reallocating the registers in the two affected functions in
|
|
the following way:
|
|
ff_sbc_analyze_4_neon: q2-q5 => q8-q11, then q1-q4 => q8-q11
|
|
ff_sbc_analyze_8_neon: q2-q9 => q8-q15
|
|
|
|
The reason for using these replacements is to keep closely related
|
|
sets of registers consecutively numbered which hopefully makes the
|
|
code more easy to follow. Since this commit only reallocates
|
|
registers, it should have no performance impact.
|
|
|
|
Signed-off-by: James Cowgill <jcowgill@debian.org>
|
|
---
|
|
libavcodec/arm/sbcdsp_neon.S | 220 +++++++++++++++++++++----------------------
|
|
1 file changed, 110 insertions(+), 110 deletions(-)
|
|
|
|
diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
|
|
index d83d21d..914abfb 100644
|
|
--- a/libavcodec/arm/sbcdsp_neon.S
|
|
+++ b/libavcodec/arm/sbcdsp_neon.S
|
|
@@ -38,49 +38,49 @@ function ff_sbc_analyze_4_neon, export=1
|
|
/* TODO: merge even and odd cases (or even merge all four calls to this
|
|
* function) in order to have only aligned reads from 'in' array
|
|
* and reduce number of load instructions */
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
|
|
- vmull.s16 q0, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmull.s16 q1, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
+ vmull.s16 q0, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmull.s16 q1, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
|
|
- vmlal.s16 q0, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmlal.s16 q1, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
+ vmlal.s16 q0, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmlal.s16 q1, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
|
|
- vmlal.s16 q0, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmlal.s16 q1, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
+ vmlal.s16 q0, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmlal.s16 q1, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
|
|
- vmlal.s16 q0, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmlal.s16 q1, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
+ vmlal.s16 q0, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmlal.s16 q1, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
|
|
- vmlal.s16 q0, d4, d8
|
|
- vmlal.s16 q1, d5, d9
|
|
+ vmlal.s16 q0, d16, d20
|
|
+ vmlal.s16 q1, d17, d21
|
|
|
|
vpadd.s32 d0, d0, d1
|
|
vpadd.s32 d1, d2, d3
|
|
|
|
vrshrn.s32 d0, q0, SBC_PROTO_FIXED_SCALE
|
|
|
|
- vld1.16 {d2, d3, d4, d5}, [r2, :128]!
|
|
+ vld1.16 {d16, d17, d18, d19}, [r2, :128]!
|
|
|
|
vdup.i32 d1, d0[1] /* TODO: can be eliminated */
|
|
vdup.i32 d0, d0[0] /* TODO: can be eliminated */
|
|
|
|
- vmull.s16 q3, d2, d0
|
|
- vmull.s16 q4, d3, d0
|
|
- vmlal.s16 q3, d4, d1
|
|
- vmlal.s16 q4, d5, d1
|
|
+ vmull.s16 q10, d16, d0
|
|
+ vmull.s16 q11, d17, d0
|
|
+ vmlal.s16 q10, d18, d1
|
|
+ vmlal.s16 q11, d19, d1
|
|
|
|
- vpadd.s32 d0, d6, d7 /* TODO: can be eliminated */
|
|
- vpadd.s32 d1, d8, d9 /* TODO: can be eliminated */
|
|
+ vpadd.s32 d0, d20, d21 /* TODO: can be eliminated */
|
|
+ vpadd.s32 d1, d22, d23 /* TODO: can be eliminated */
|
|
|
|
vst1.32 {d0, d1}, [r1, :128]
|
|
|
|
@@ -91,57 +91,57 @@ function ff_sbc_analyze_8_neon, export=1
|
|
/* TODO: merge even and odd cases (or even merge all four calls to this
|
|
* function) in order to have only aligned reads from 'in' array
|
|
* and reduce number of load instructions */
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
-
|
|
- vmull.s16 q6, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmull.s16 q7, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
- vmull.s16 q8, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmull.s16 q9, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
-
|
|
- vmlal.s16 q6, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmlal.s16 q7, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
- vmlal.s16 q8, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmlal.s16 q9, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
-
|
|
- vmlal.s16 q6, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmlal.s16 q7, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
- vmlal.s16 q8, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmlal.s16 q9, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
-
|
|
- vmlal.s16 q6, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmlal.s16 q7, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
- vmlal.s16 q8, d6, d10
|
|
- vld1.16 {d4, d5}, [r0, :64]!
|
|
- vmlal.s16 q9, d7, d11
|
|
- vld1.16 {d8, d9}, [r2, :128]!
|
|
-
|
|
- vmlal.s16 q6, d4, d8
|
|
- vld1.16 {d6, d7}, [r0, :64]!
|
|
- vmlal.s16 q7, d5, d9
|
|
- vld1.16 {d10, d11}, [r2, :128]!
|
|
-
|
|
- vmlal.s16 q8, d6, d10
|
|
- vmlal.s16 q9, d7, d11
|
|
-
|
|
- vpadd.s32 d0, d12, d13
|
|
- vpadd.s32 d1, d14, d15
|
|
- vpadd.s32 d2, d16, d17
|
|
- vpadd.s32 d3, d18, d19
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
+
|
|
+ vmull.s16 q12, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmull.s16 q13, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
+ vmull.s16 q14, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmull.s16 q15, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
+
|
|
+ vmlal.s16 q12, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmlal.s16 q13, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
+ vmlal.s16 q14, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmlal.s16 q15, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
+
|
|
+ vmlal.s16 q12, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmlal.s16 q13, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
+ vmlal.s16 q14, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmlal.s16 q15, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
+
|
|
+ vmlal.s16 q12, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmlal.s16 q13, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
+ vmlal.s16 q14, d18, d22
|
|
+ vld1.16 {d16, d17}, [r0, :64]!
|
|
+ vmlal.s16 q15, d19, d23
|
|
+ vld1.16 {d20, d21}, [r2, :128]!
|
|
+
|
|
+ vmlal.s16 q12, d16, d20
|
|
+ vld1.16 {d18, d19}, [r0, :64]!
|
|
+ vmlal.s16 q13, d17, d21
|
|
+ vld1.16 {d22, d23}, [r2, :128]!
|
|
+
|
|
+ vmlal.s16 q14, d18, d22
|
|
+ vmlal.s16 q15, d19, d23
|
|
+
|
|
+ vpadd.s32 d0, d24, d25
|
|
+ vpadd.s32 d1, d26, d27
|
|
+ vpadd.s32 d2, d28, d29
|
|
+ vpadd.s32 d3, d30, d31
|
|
|
|
vrshr.s32 q0, q0, SBC_PROTO_FIXED_SCALE
|
|
vrshr.s32 q1, q1, SBC_PROTO_FIXED_SCALE
|
|
@@ -153,38 +153,38 @@ function ff_sbc_analyze_8_neon, export=1
|
|
vdup.i32 d1, d0[1] /* TODO: can be eliminated */
|
|
vdup.i32 d0, d0[0] /* TODO: can be eliminated */
|
|
|
|
- vld1.16 {d4, d5}, [r2, :128]!
|
|
- vmull.s16 q6, d4, d0
|
|
- vld1.16 {d6, d7}, [r2, :128]!
|
|
- vmull.s16 q7, d5, d0
|
|
- vmull.s16 q8, d6, d0
|
|
- vmull.s16 q9, d7, d0
|
|
-
|
|
- vld1.16 {d4, d5}, [r2, :128]!
|
|
- vmlal.s16 q6, d4, d1
|
|
- vld1.16 {d6, d7}, [r2, :128]!
|
|
- vmlal.s16 q7, d5, d1
|
|
- vmlal.s16 q8, d6, d1
|
|
- vmlal.s16 q9, d7, d1
|
|
-
|
|
- vld1.16 {d4, d5}, [r2, :128]!
|
|
- vmlal.s16 q6, d4, d2
|
|
- vld1.16 {d6, d7}, [r2, :128]!
|
|
- vmlal.s16 q7, d5, d2
|
|
- vmlal.s16 q8, d6, d2
|
|
- vmlal.s16 q9, d7, d2
|
|
-
|
|
- vld1.16 {d4, d5}, [r2, :128]!
|
|
- vmlal.s16 q6, d4, d3
|
|
- vld1.16 {d6, d7}, [r2, :128]!
|
|
- vmlal.s16 q7, d5, d3
|
|
- vmlal.s16 q8, d6, d3
|
|
- vmlal.s16 q9, d7, d3
|
|
-
|
|
- vpadd.s32 d0, d12, d13 /* TODO: can be eliminated */
|
|
- vpadd.s32 d1, d14, d15 /* TODO: can be eliminated */
|
|
- vpadd.s32 d2, d16, d17 /* TODO: can be eliminated */
|
|
- vpadd.s32 d3, d18, d19 /* TODO: can be eliminated */
|
|
+ vld1.16 {d16, d17}, [r2, :128]!
|
|
+ vmull.s16 q12, d16, d0
|
|
+ vld1.16 {d18, d19}, [r2, :128]!
|
|
+ vmull.s16 q13, d17, d0
|
|
+ vmull.s16 q14, d18, d0
|
|
+ vmull.s16 q15, d19, d0
|
|
+
|
|
+ vld1.16 {d16, d17}, [r2, :128]!
|
|
+ vmlal.s16 q12, d16, d1
|
|
+ vld1.16 {d18, d19}, [r2, :128]!
|
|
+ vmlal.s16 q13, d17, d1
|
|
+ vmlal.s16 q14, d18, d1
|
|
+ vmlal.s16 q15, d19, d1
|
|
+
|
|
+ vld1.16 {d16, d17}, [r2, :128]!
|
|
+ vmlal.s16 q12, d16, d2
|
|
+ vld1.16 {d18, d19}, [r2, :128]!
|
|
+ vmlal.s16 q13, d17, d2
|
|
+ vmlal.s16 q14, d18, d2
|
|
+ vmlal.s16 q15, d19, d2
|
|
+
|
|
+ vld1.16 {d16, d17}, [r2, :128]!
|
|
+ vmlal.s16 q12, d16, d3
|
|
+ vld1.16 {d18, d19}, [r2, :128]!
|
|
+ vmlal.s16 q13, d17, d3
|
|
+ vmlal.s16 q14, d18, d3
|
|
+ vmlal.s16 q15, d19, d3
|
|
+
|
|
+ vpadd.s32 d0, d24, d25 /* TODO: can be eliminated */
|
|
+ vpadd.s32 d1, d26, d27 /* TODO: can be eliminated */
|
|
+ vpadd.s32 d2, d28, d29 /* TODO: can be eliminated */
|
|
+ vpadd.s32 d3, d30, d31 /* TODO: can be eliminated */
|
|
|
|
vst1.32 {d0, d1, d2, d3}, [r1, :128]
|
|
|