28 files changed, 1916 insertions, 191 deletions
diff --git a/media/base/cpu_features.h b/media/base/cpu_features.h
index c2762d8..0878385 100644
--- a/media/base/cpu_features.h
+++ b/media/base/cpu_features.h
@@ -10,6 +10,12 @@
 
 namespace media {
 
+// Returns true if CPU has MMX support.
+bool hasMMX();
+
+// Returns true if CPU has SSE support.
+bool hasSSE();
+
 // Returns true if CPU has SSE2 support.
 bool hasSSE2();
 
diff --git a/media/base/cpu_features_x86.cc b/media/base/cpu_features_x86.cc
index bf7d05d..4fb9304 100644
--- a/media/base/cpu_features_x86.cc
+++ b/media/base/cpu_features_x86.cc
@@ -48,6 +48,16 @@ static inline void getcpuid(int info_type, int info[4]) {
 }
 #endif
 
+bool hasMMX() {
+  // TODO(hclam): Acutually checks it.
+  return true;
+}
+
+bool hasSSE() {
+  // TODO(hclam): Actually checks it.
+  return true;
+}
+
 bool hasSSE2() {
 #if defined(ARCH_CPU_X86_64)
   /* All x86_64 machines have SSE2, so don't even bother checking. */
diff --git a/media/base/simd/convert_rgb_to_yuv_x86.cc b/media/base/simd/convert_rgb_to_yuv_x86.cc
new file mode 100644
index 0000000..2bd6930
--- /dev/null
+++ b/media/base/simd/convert_rgb_to_yuv_x86.cc
@@ -0,0 +1,101 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/simd/convert_rgb_to_yuv.h"
+
+#include "build/build_config.h"
+#include "media/base/cpu_features.h"
+#include "media/base/simd/convert_rgb_to_yuv_ssse3.h"
+
+namespace media {
+
+void ConvertRGB32ToYUV_SSSE3(const uint8* rgbframe,
+                             uint8* yplane,
+                             uint8* uplane,
+                             uint8* vplane,
+                             int width,
+                             int height,
+                             int rgbstride,
+                             int ystride,
+                             int uvstride) {
+#ifdef ENABLE_SUBSAMPLING
+  for (; height >= 2; height -= 2) {
+    ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    ConvertARGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    uplane += uvstride;
+    vplane += uvstride;
+  }
+
+  if (height)
+    ConvertARGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width);
+#else
+  for (; height >= 2; height -= 2) {
+    ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    uplane += uvstride;
+    vplane += uvstride;
+  }
+
+  if (height)
+    ConvertARGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width);
+#endif
+}
+
+void ConvertRGB24ToYUV_SSSE3(const uint8* rgbframe,
+                             uint8* yplane,
+                             uint8* uplane,
+                             uint8* vplane,
+                             int width,
+                             int height,
+                             int rgbstride,
+                             int ystride,
+                             int uvstride) {
+#ifdef ENABLE_SUBSAMPLING
+  for (; height >= 2; height -= 2) {
+    ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    ConvertRGBToYUVOdd_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    uplane += uvstride;
+    vplane += uvstride;
+  }
+
+  if (height)
+    ConvertRGBToYUVEven_SSSE3(rgbframe, yplane, uplane, vplane, width);
+#else
+  for (; height >= 2; height -= 2) {
+    ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, NULL, NULL, width);
+    rgbframe += rgbstride;
+    yplane += ystride;
+
+    uplane += uvstride;
+    vplane += uvstride;
+  }
+
+  if (height)
+    ConvertRGBToYUVRow_SSSE3(rgbframe, yplane, uplane, vplane, width);
+#endif
+}
+
+}  // namespace media
diff --git a/media/base/simd/convert_yuv_to_rgb.h b/media/base/simd/convert_yuv_to_rgb.h
new file mode 100644
index 0000000..5f3df2c6
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb.h
@@ -0,0 +1,150 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_
+#define MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_
+
+#include "base/basictypes.h"
+#include "media/base/yuv_convert.h"
+
+namespace media {
+
+typedef void (*ConvertYUVToRGB32Proc)(const uint8*,
+                                      const uint8*,
+                                      const uint8*,
+                                      uint8*,
+                                      int,
+                                      int,
+                                      int,
+                                      int,
+                                      int,
+                                      YUVType);
+
+void ConvertYUVToRGB32_C(const uint8* yplane,
+                         const uint8* uplane,
+                         const uint8* vplane,
+                         uint8* rgbframe,
+                         int width,
+                         int height,
+                         int ystride,
+                         int uvstride,
+                         int rgbstride,
+                         YUVType yuv_type);
+
+void ConvertYUVToRGB32_SSE(const uint8* yplane,
+                           const uint8* uplane,
+                           const uint8* vplane,
+                           uint8* rgbframe,
+                           int width,
+                           int height,
+                           int ystride,
+                           int uvstride,
+                           int rgbstride,
+                           YUVType yuv_type);
+
+void ConvertYUVToRGB32_MMX(const uint8* yplane,
+                           const uint8* uplane,
+                           const uint8* vplane,
+                           uint8* rgbframe,
+                           int width,
+                           int height,
+                           int ystride,
+                           int uvstride,
+                           int rgbstride,
+                           YUVType yuv_type);
+
+}  // namespace media
+
+// Assembly functions are declared without namespace.
+extern "C" {
+
+typedef void (*ConvertYUVToRGB32RowProc)(const uint8*,
+                                          const uint8*,
+                                          const uint8*,
+                                          uint8*,
+                                          int);
+typedef void (*ScaleYUVToRGB32RowProc)(const uint8*,
+                                       const uint8*,
+                                       const uint8*,
+                                       uint8*,
+                                       int,
+                                       int);
+
+void ConvertYUVToRGB32Row_C(const uint8* yplane,
+                            const uint8* uplane,
+                            const uint8* vplane,
+                            uint8* rgbframe,
+                            int width);
+
+void ConvertYUVToRGB32Row_MMX(const uint8* yplane,
+                              const uint8* uplane,
+                              const uint8* vplane,
+                              uint8* rgbframe,
+                              int width);
+
+void ConvertYUVToRGB32Row_SSE(const uint8* yplane,
+                              const uint8* uplane,
+                              const uint8* vplane,
+                              uint8* rgbframe,
+                              int width);
+
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb_buf,
+                          int width,
+                          int source_dx);
+
+void ScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width,
+                            int source_dx);
+
+void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width,
+                            int source_dx);
+
+void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf,
+                                 const uint8* u_buf,
+                                 const uint8* v_buf,
+                                 uint8* rgb_buf,
+                                 int width,
+                                 int source_dx);
+
+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* rgb_buf,
+                                int width,
+                                int source_dx);
+
+void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+                                  const uint8* u_buf,
+                                  const uint8* v_buf,
+                                  uint8* rgb_buf,
+                                  int width,
+                                  int source_dx);
+
+void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
+                                  const uint8* u_buf,
+                                  const uint8* v_buf,
+                                  uint8* rgb_buf,
+                                  int width,
+                                  int source_dx);
+
+void LinearScaleYUVToRGB32Row_MMX_X64(const uint8* y_buf,
+                                      const uint8* u_buf,
+                                      const uint8* v_buf,
+                                      uint8* rgb_buf,
+                                      int width,
+                                      int source_dx);
+
+}
+
+#endif  // MEDIA_BASE_SIMD_CONVERT_YUV_TO_RGB_H_
diff --git a/media/base/simd/convert_yuv_to_rgb_c.cc b/media/base/simd/convert_yuv_to_rgb_c.cc
new file mode 100644
index 0000000..f8e70b2
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb_c.cc
@@ -0,0 +1,155 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/simd/convert_yuv_to_rgb.h"
+// TODO(hclam): Shouldn't depend on yuv_row.h.
+#include "media/base/yuv_row.h"
+
+#define packuswb(x) ((x) < 0 ? 0 : ((x) > 255 ? 255 : (x)))
+#define paddsw(x, y) (((x) + (y)) < -32768 ? -32768 : \
+    (((x) + (y)) > 32767 ? 32767 : ((x) + (y))))
+
+static inline void YUVPixel(uint8 y,
+                            uint8 u,
+                            uint8 v,
+                            uint8* rgb_buf) {
+
+  int b = kCoefficientsRgbY[256+u][0];
+  int g = kCoefficientsRgbY[256+u][1];
+  int r = kCoefficientsRgbY[256+u][2];
+  int a = kCoefficientsRgbY[256+u][3];
+
+  b = paddsw(b, kCoefficientsRgbY[512+v][0]);
+  g = paddsw(g, kCoefficientsRgbY[512+v][1]);
+  r = paddsw(r, kCoefficientsRgbY[512+v][2]);
+  a = paddsw(a, kCoefficientsRgbY[512+v][3]);
+
+  b = paddsw(b, kCoefficientsRgbY[y][0]);
+  g = paddsw(g, kCoefficientsRgbY[y][1]);
+  r = paddsw(r, kCoefficientsRgbY[y][2]);
+  a = paddsw(a, kCoefficientsRgbY[y][3]);
+
+  b >>= 6;
+  g >>= 6;
+  r >>= 6;
+  a >>= 6;
+
+  *reinterpret_cast<uint32*>(rgb_buf) = (packuswb(b)) |
+                                        (packuswb(g) << 8) |
+                                        (packuswb(r) << 16) |
+                                        (packuswb(a) << 24);
+}
+
+extern "C" {
+
+void ConvertYUVToRGB32Row_C(const uint8* y_buf,
+                            const uint8* u_buf,
+                            const uint8* v_buf,
+                            uint8* rgb_buf,
+                            int width) {
+  for (int x = 0; x < width; x += 2) {
+    uint8 u = u_buf[x >> 1];
+    uint8 v = v_buf[x >> 1];
+    uint8 y0 = y_buf[x];
+    YUVPixel(y0, u, v, rgb_buf);
+    if ((x + 1) < width) {
+      uint8 y1 = y_buf[x + 1];
+      YUVPixel(y1, u, v, rgb_buf + 4);
+    }
+    rgb_buf += 8;  // Advance 2 pixels.
+  }
+}
+
+// 16.16 fixed point is used.  A shift by 16 isolates the integer.
+// A shift by 17 is used to further subsample the chrominence channels.
+// & 0xffff isolates the fixed point fraction.  >> 2 to get the upper 2 bits,
+// for 1/65536 pixel accurate interpolation.
+void ScaleYUVToRGB32Row_C(const uint8* y_buf,
+                          const uint8* u_buf,
+                          const uint8* v_buf,
+                          uint8* rgb_buf,
+                          int width,
+                          int source_dx) {
+  int x = 0;
+  for (int i = 0; i < width; i += 2) {
+    int y = y_buf[x >> 16];
+    int u = u_buf[(x >> 17)];
+    int v = v_buf[(x >> 17)];
+    YUVPixel(y, u, v, rgb_buf);
+    x += source_dx;
+    if ((i + 1) < width) {
+      y = y_buf[x >> 16];
+      YUVPixel(y, u, v, rgb_buf+4);
+      x += source_dx;
+    }
+    rgb_buf += 8;
+  }
+}
+
+void LinearScaleYUVToRGB32Row_C(const uint8* y_buf,
+                                const uint8* u_buf,
+                                const uint8* v_buf,
+                                uint8* rgb_buf,
+                                int width,
+                                int source_dx) {
+  int x = 0;
+  if (source_dx >= 0x20000) {
+    x = 32768;
+  }
+  for (int i = 0; i < width; i += 2) {
+    int y0 = y_buf[x >> 16];
+    int y1 = y_buf[(x >> 16) + 1];
+    int u0 = u_buf[(x >> 17)];
+    int u1 = u_buf[(x >> 17) + 1];
+    int v0 = v_buf[(x >> 17)];
+    int v1 = v_buf[(x >> 17) + 1];
+    int y_frac = (x & 65535);
+    int uv_frac = ((x >> 1) & 65535);
+    int y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+    int u = (uv_frac * u1 + (uv_frac ^ 65535) * u0) >> 16;
+    int v = (uv_frac * v1 + (uv_frac ^ 65535) * v0) >> 16;
+    YUVPixel(y, u, v, rgb_buf);
+    x += source_dx;
+    if ((i + 1) < width) {
+      y0 = y_buf[x >> 16];
+      y1 = y_buf[(x >> 16) + 1];
+      y_frac = (x & 65535);
+      y = (y_frac * y1 + (y_frac ^ 65535) * y0) >> 16;
+      YUVPixel(y, u, v, rgb_buf+4);
+      x += source_dx;
+    }
+    rgb_buf += 8;
+  }
+}
+
+}
+
+namespace media {
+
+void ConvertYUVToRGB32_C(const uint8* yplane,
+                         const uint8* uplane,
+                         const uint8* vplane,
+                         uint8* rgbframe,
+                         int width,
+                         int height,
+                         int ystride,
+                         int uvstride,
+                         int rgbstride,
+                         YUVType yuv_type) {
+  unsigned int y_shift = yuv_type;
+  for (int y = 0; y < height; ++y) {
+    uint8* rgb_row = rgbframe + y * rgbstride;
+    const uint8* y_ptr = yplane + y * ystride;
+    const uint8* u_ptr = uplane + (y >> y_shift) * uvstride;
+    const uint8* v_ptr = vplane + (y >> y_shift) * uvstride;
+
+    ConvertYUVToRGB32Row_C(y_ptr,
+                           u_ptr,
+                           v_ptr,
+                           rgb_row,
+                           width);
+  }
+}
+
+}  // namespace media
diff --git a/media/base/simd/convert_yuv_to_rgb_mmx.asm b/media/base/simd/convert_yuv_to_rgb_mmx.asm
new file mode 100644
index 0000000..e044474
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb_mmx.asm
@@ -0,0 +1,22 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX instructions.
+;
+  SECTION_TEXT
+  CPU       MMX
+
+; Use movq to save the output.
+%define MOVQ movq
+
+; extern "C" void ConvertYUVToRGB32Row_MMX(const uint8* y_buf,
+;                                          const uint8* u_buf,
+;                                          const uint8* v_buf,
+;                                          uint8* rgb_buf,
+;                                          int width);
+%define SYMBOL ConvertYUVToRGB32Row_MMX
+%include "convert_yuv_to_rgb_mmx.inc"
diff --git a/media/base/simd/convert_yuv_to_rgb_mmx.inc b/media/base/simd/convert_yuv_to_rgb_mmx.inc
new file mode 100644
index 0000000..b9555ce
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb_mmx.inc
@@ -0,0 +1,119 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+  global    mangle(SYMBOL) PRIVATE
+  align     function_align
+
+; Non-PIC code is the fastest so use this if possible.
+%ifndef PIC
+mangle(SYMBOL):
+  %assign   stack_offset 0
+  PROLOGUE  5, 7, 3, Y, U, V, ARGB, WIDTH, TEMPU, TEMPV
+  extern    mangle(kCoefficientsRgbY)
+  jmp       .convertend
+
+.convertloop:
+  movzx     TEMPUd, BYTE [Uq]
+  add       Uq, 1
+  movzx     TEMPVd, BYTE [Vq]
+  add       Vq, 1
+  movq      mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
+  movzx     TEMPUd, BYTE [Yq]
+  paddsw    mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
+  movzx     TEMPVd, BYTE [Yq + 1]
+  movq      mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
+  add       Yq, 2
+  movq      mm2, [mangle(kCoefficientsRgbY) + 8 * TEMPVq]
+  paddsw    mm1, mm0
+  paddsw    mm2, mm0
+  psraw     mm1, 6
+  psraw     mm2, 6
+  packuswb  mm1, mm2
+  MOVQ      [ARGBq], mm1
+  add       ARGBq, 8
+
+.convertend:
+  sub       WIDTHq, 2
+  jns       .convertloop
+
+  ; If number of pixels is odd then compute it.
+  and       WIDTHq, 1
+  jz        .convertdone
+
+  movzx     TEMPUd, BYTE [Uq]
+  movq      mm0, [mangle(kCoefficientsRgbY) + 2048 + 8 * TEMPUq]
+  movzx     TEMPVd, BYTE [Vq]
+  paddsw    mm0, [mangle(kCoefficientsRgbY) + 4096 + 8 * TEMPVq]
+  movzx     TEMPUd, BYTE [Yq]
+  movq      mm1, [mangle(kCoefficientsRgbY) + 8 * TEMPUq]
+  paddsw    mm1, mm0
+  psraw     mm1, 6
+  packuswb  mm1, mm1
+  movd      [ARGBq], mm1
+
+.convertdone:
+  RET
+%endif
+
+; With PIC code we need to load the address of mangle(kCoefficientsRgbY).
+; This code is slower than the above version.
+%ifdef PIC
+mangle(SYMBOL):
+  %assign   stack_offset 0
+  PROLOGUE  5, 7, 3, Y, U, V, ARGB, WIDTH, TEMP, TABLE
+
+  extern    mangle(kCoefficientsRgbY)
+  LOAD_SYM  TABLEq, mangle(kCoefficientsRgbY)
+
+  jmp       .convertend
+
+.convertloop:
+  movzx     TEMPd, BYTE [Uq]
+  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
+  add       Uq, 1
+
+  movzx     TEMPd, BYTE [Vq]
+  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
+  add       Vq, 1
+
+  movzx     TEMPd, BYTE [Yq]
+  movq      mm1, [TABLEq + 8 * TEMPq]
+
+  movzx     TEMPd, BYTE [Yq + 1]
+  movq      mm2, [TABLEq + 8 * TEMPq]
+  add       Yq, 2
+
+  ; Add UV components to Y component.
+  paddsw    mm1, mm0
+  paddsw    mm2, mm0
+
+  ; Down shift and then pack.
+  psraw     mm1, 6
+  psraw     mm2, 6
+  packuswb  mm1, mm2
+  MOVQ      [ARGBq], mm1
+  add       ARGBq, 8
+
+.convertend:
+  sub       WIDTHq, 2
+  jns       .convertloop
+
+  ; If number of pixels is odd then compute it.
+  and       WIDTHq, 1
+  jz        .convertdone
+
+  movzx     TEMPd, BYTE [Uq]
+  movq      mm0, [TABLEq + 2048 + 8 * TEMPq]
+  movzx     TEMPd, BYTE [Vq]
+  paddsw    mm0, [TABLEq + 4096 + 8 * TEMPq]
+  movzx     TEMPd, BYTE [Yq]
+  movq      mm1, [TABLEq + 8 * TEMPq]
+  paddsw    mm1, mm0
+  psraw     mm1, 6
+  packuswb  mm1, mm1
+  movd      [ARGBq], mm1
+
+.convertdone:
+  RET
+%endif
diff --git a/media/base/simd/convert_yuv_to_rgb_sse.asm b/media/base/simd/convert_yuv_to_rgb_sse.asm
new file mode 100644
index 0000000..28d2214
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb_sse.asm
@@ -0,0 +1,40 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX and SSE instructions.
+;
+  SECTION_TEXT
+  CPU       MMX, SSE
+
+; Use SSE instruction movntq can write faster.
+%define MOVQ movntq
+
+;
+; extern "C" void ConvertYUVToRGB32Row_SSE(const uint8* y_buf,
+;                                          const uint8* u_buf,
+;                                          const uint8* v_buf,
+;                                          uint8* rgb_buf,
+;                                          int width);
+%define SYMBOL ConvertYUVToRGB32Row_SSE
+%include "convert_yuv_to_rgb_mmx.inc"
+
+; void ScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+;                             const uint8* u_buf,
+;                             const uint8* v_buf,
+;                             uint8* rgb_buf,
+;                             int width,
+;                             int source_dx);
+%define SYMBOL ScaleYUVToRGB32Row_SSE
+%include "scale_yuv_to_rgb_mmx.inc"
+
+; void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+;                                   const uint8* u_buf,
+;                                   const uint8* v_buf,
+;                                   uint8* rgb_buf,
+;                                   int width,
+;                                   int source_dx);
+
diff --git a/media/base/simd/convert_yuv_to_rgb_x86.cc b/media/base/simd/convert_yuv_to_rgb_x86.cc
new file mode 100644
index 0000000..3e03ef9
--- /dev/null
+++ b/media/base/simd/convert_yuv_to_rgb_x86.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#else
+#include <mmintrin.h>
+#endif
+
+#include "media/base/cpu_features.h"
+#include "media/base/simd/convert_yuv_to_rgb.h"
+#include "media/base/yuv_convert.h"
+
+namespace media {
+
+void ConvertYUVToRGB32_MMX(const uint8* yplane,
+                           const uint8* uplane,
+                           const uint8* vplane,
+                           uint8* rgbframe,
+                           int width,
+                           int height,
+                           int ystride,
+                           int uvstride,
+                           int rgbstride,
+                           YUVType yuv_type) {
+  unsigned int y_shift = yuv_type;
+  for (int y = 0; y < height; ++y) {
+    uint8* rgb_row = rgbframe + y * rgbstride;
+    const uint8* y_ptr = yplane + y * ystride;
+    const uint8* u_ptr = uplane + (y >> y_shift) * uvstride;
+    const uint8* v_ptr = vplane + (y >> y_shift) * uvstride;
+
+    ConvertYUVToRGB32Row_MMX(y_ptr,
+                             u_ptr,
+                             v_ptr,
+                             rgb_row,
+                             width);
+  }
+
+  _mm_empty();
+}
+
+void ConvertYUVToRGB32_SSE(const uint8* yplane,
+                           const uint8* uplane,
+                           const uint8* vplane,
+                           uint8* rgbframe,
+                           int width,
+                           int height,
+                           int ystride,
+                           int uvstride,
+                           int rgbstride,
+                           YUVType yuv_type) {
+  unsigned int y_shift = yuv_type;
+  for (int y = 0; y < height; ++y) {
+    uint8* rgb_row = rgbframe + y * rgbstride;
+    const uint8* y_ptr = yplane + y * ystride;
+    const uint8* u_ptr = uplane + (y >> y_shift) * uvstride;
+    const uint8* v_ptr = vplane + (y >> y_shift) * uvstride;
+
+    ConvertYUVToRGB32Row_SSE(y_ptr,
+                             u_ptr,
+                             v_ptr,
+                             rgb_row,
+                             width);
+  }
+
+  _mm_empty();
+}
+
+}  // namespace media
diff --git a/media/base/simd/filter_yuv.h b/media/base/simd/filter_yuv.h
new file mode 100644
index 0000000..5a9cf11
--- /dev/null
+++ b/media/base/simd/filter_yuv.h
@@ -0,0 +1,29 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef MEDIA_BASE_SIMD_FILTER_YUV_H_
+#define MEDIA_BASE_SIMD_FILTER_YUV_H_
+
+#include "base/basictypes.h"
+
+namespace media {
+
+typedef void (*FilterYUVRowsProc)(uint8*,
+                                  const uint8*,
+                                  const uint8*,
+                                  int,
+                                  int);
+
+void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                     int source_width, int source_y_fraction);
+
+void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                       int source_width, int source_y_fraction);
+
+void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                        int source_width, int source_y_fraction);
+
+}  // namespace media
+
+#endif  // MEDIA_BASE_SIMD_FILTER_YUV_H_
diff --git a/media/base/simd/filter_yuv_c.cc b/media/base/simd/filter_yuv_c.cc
new file mode 100644
index 0000000..95ae01a
--- /dev/null
+++ b/media/base/simd/filter_yuv_c.cc
@@ -0,0 +1,29 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/base/simd/filter_yuv.h"
+
+namespace media {
+
+void FilterYUVRows_C(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                     int source_width, int source_y_fraction) {
+  int y1_fraction = source_y_fraction;
+  int y0_fraction = 256 - y1_fraction;
+  uint8* end = ybuf + source_width;
+  do {
+    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
+    ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
+    ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
+    ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
+    ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
+    ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
+    ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
+    ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
+    y0_ptr += 8;
+    y1_ptr += 8;
+    ybuf += 8;
+  } while (ybuf < end);
+}
+
+}  // namespace media
diff --git a/media/base/simd/filter_yuv_mmx.cc b/media/base/simd/filter_yuv_mmx.cc
new file mode 100644
index 0000000..77698dc
--- /dev/null
+++ b/media/base/simd/filter_yuv_mmx.cc
@@ -0,0 +1,58 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#else
+#include <mmintrin.h>
+#include <emmintrin.h>
+#endif
+
+#include "build/build_config.h"
+#include "media/base/simd/filter_yuv.h"
+
+namespace media {
+
+#if defined(COMPILER_MSVC)
+// Warning 4799 is about calling emms before the function exits.
+// We calls emms in a frame level so suppress this warning.
+#pragma warning(disable: 4799)
+#endif
+
+void FilterYUVRows_MMX(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                       int source_width, int source_y_fraction) {
+  __m64 zero = _mm_setzero_si64();
+  __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
+  __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
+
+  const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
+  const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
+  __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
+  __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
+
+  do {
+    __m64 y0 = *y0_ptr64++;
+    __m64 y1 = *y1_ptr64++;
+    __m64 y2 = _mm_unpackhi_pi8(y0, zero);
+    __m64 y3 = _mm_unpackhi_pi8(y1, zero);
+    y0 = _mm_unpacklo_pi8(y0, zero);
+    y1 = _mm_unpacklo_pi8(y1, zero);
+    y0 = _mm_mullo_pi16(y0, y0_fraction);
+    y1 = _mm_mullo_pi16(y1, y1_fraction);
+    y2 = _mm_mullo_pi16(y2, y0_fraction);
+    y3 = _mm_mullo_pi16(y3, y1_fraction);
+    y0 = _mm_add_pi16(y0, y1);
+    y2 = _mm_add_pi16(y2, y3);
+    y0 = _mm_srli_pi16(y0, 8);
+    y2 = _mm_srli_pi16(y2, 8);
+    y0 = _mm_packs_pu16(y0, y2);
+    *dest64++ = y0;
+  } while (dest64 < end64);
+}
+
+#if defined(COMPILER_MSVC)
+#pragma warning(default: 4799)
+#endif
+
+}  // namespace media
diff --git a/media/base/simd/filter_yuv_sse2.cc b/media/base/simd/filter_yuv_sse2.cc
new file mode 100644
index 0000000..137ac94
--- /dev/null
+++ b/media/base/simd/filter_yuv_sse2.cc
@@ -0,0 +1,49 @@
+// Copyright (c) 2011 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#else
+#include <mmintrin.h>
+#include <emmintrin.h>
+#endif
+
+#include "media/base/simd/filter_yuv.h"
+
+namespace media {
+
+void FilterYUVRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+                        int source_width, int source_y_fraction) {
+  __m128i zero = _mm_setzero_si128();
+  __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
+  __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
+
+  const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
+  const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
+  __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
+  __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
+
+  do {
+    __m128i y0 = _mm_loadu_si128(y0_ptr128);
+    __m128i y1 = _mm_loadu_si128(y1_ptr128);
+    __m128i y2 = _mm_unpackhi_epi8(y0, zero);
+    __m128i y3 = _mm_unpackhi_epi8(y1, zero);
+    y0 = _mm_unpacklo_epi8(y0, zero);
+    y1 = _mm_unpacklo_epi8(y1, zero);
+    y0 = _mm_mullo_epi16(y0, y0_fraction);
+    y1 = _mm_mullo_epi16(y1, y1_fraction);
+    y2 = _mm_mullo_epi16(y2, y0_fraction);
+    y3 = _mm_mullo_epi16(y3, y1_fraction);
+    y0 = _mm_add_epi16(y0, y1);
+    y2 = _mm_add_epi16(y2, y3);
+    y0 = _mm_srli_epi16(y0, 8);
+    y2 = _mm_srli_epi16(y2, 8);
+    y0 = _mm_packus_epi16(y0, y2);
+    *dest128++ = y0;
+    ++y0_ptr128;
+    ++y1_ptr128;
+  } while (dest128 < end128);
+}
+
+}  // namespace media
diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm b/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm
new file mode 100644
index 0000000..7f7e0e8
--- /dev/null
+++ b/media/base/simd/linear_scale_yuv_to_rgb_mmx.asm
@@ -0,0 +1,23 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX instructions.
+;
+  SECTION_TEXT
+  CPU       MMX
+
+; Use movq to save the output.
+%define MOVQ movq
+
+; void LinearScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+;                                   const uint8* u_buf,
+;                                   const uint8* v_buf,
+;                                   uint8* rgb_buf,
+;                                   int width,
+;                                   int source_dx);
+%define SYMBOL LinearScaleYUVToRGB32Row_MMX
+%include "linear_scale_yuv_to_rgb_mmx.inc"
diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc b/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc
new file mode 100644
index 0000000..91c06a5
--- /dev/null
+++ b/media/base/simd/linear_scale_yuv_to_rgb_mmx.inc
@@ -0,0 +1,166 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+  global    mangle(SYMBOL) PRIVATE
+  align     function_align
+
+mangle(SYMBOL):
+  %assign   stack_offset 0
+
+  extern    mangle(kCoefficientsRgbY)
+
+; Parameters are in the following order:
+; 1. Y plane
+; 2. U plane
+; 3. V plane
+; 4. ARGB frame
+; 5. Width
+; 6. Source dx
+
+PROLOGUE  6, 7, 3, Y, R0, R1, ARGB, R2, R3, TEMP
+
+%if gprsize == 8
+%define     WORD_SIZE   QWORD
+%else
+%define     WORD_SIZE   DWORD
+%endif
+
+; Define register aliases.
+%define     Xq                  R1q     ; Current X position
+%define     COMPLq              R2q     ; Component A value
+%define     COMPLd              R2d     ; Component A value
+%define     U_ARG_REGq          R0q     ; U plane address argument
+%define     V_ARG_REGq          R1q     ; V plane address argument
+%define     SOURCE_DX_ARG_REGq  R3q     ; Source dx argument
+%define     WIDTH_ARG_REGq      R2q     ; Width argument
+
+%ifdef PIC
+; PIC code shared COMPR, U and V with the same register. Need to be careful in the
+; code they don't mix up. This allows R3q to be used for YUV table.
+%define     COMPRq              R0q     ; Component B value
+%define     COMPRd              R0d     ; Component B value
+%define     Uq                  R0q     ; U plane address
+%define     Vq                  R0q     ; V plane address
+%define     U_PLANE             WORD_SIZE [rsp + 3 * gprsize]
+%define     TABLE               R3q     ; Address of the table
+%else
+; Non-PIC code defines.
+%define     COMPRq              R3q     ; Component B value
+%define     COMPRd              R3d     ; Component B value
+%define     Uq                  R0q     ; U plane address
+%define     Vq                  R3q     ; V plane address
+%define     TABLE               mangle(kCoefficientsRgbY)
+%endif
+
+; Defines for stack variables. These are used in both PIC and non-PIC code.
+%define     V_PLANE             WORD_SIZE [rsp + 2 * gprsize]
+%define     SOURCE_DX           WORD_SIZE [rsp + gprsize]
+%define     SOURCE_WIDTH        WORD_SIZE [rsp]
+
+; Handle stack variables differently for PIC and non-PIC code.
+
+%ifdef PIC
+; Define stack usage for PIC code. PIC code push U plane onto stack.
+  PUSH      U_ARG_REGq
+  PUSH      V_ARG_REGq
+  PUSH      SOURCE_DX_ARG_REGq
+  imul      WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq  ; source_width = width * source_dx
+  PUSH      WIDTH_ARG_REGq
+
+; Load the address of kCoefficientsRgbY into TABLE
+  mov       TEMPq, SOURCE_DX_ARG_REGq    ; Need to save source_dx first
+  LOAD_SYM  TABLE, mangle(kCoefficientsRgbY)
+%define     SOURCE_DX_ARG_REGq  TEMPq   ; Overwrite SOURCE_DX_ARG_REGq to TEMPq
+%else
+; Define stack usage. Non-PIC code just push 3 registers to stack.
+  PUSH      V_ARG_REGq
+  PUSH      SOURCE_DX_ARG_REGq
+  imul      WIDTH_ARG_REGq, SOURCE_DX_ARG_REGq  ; source_width = width * source_dx
+  PUSH      WIDTH_ARG_REGq
+%endif
+
+%macro EPILOGUE 0
+%ifdef PIC
+  ADD       rsp, 4 * gprsize
+%else
+  ADD       rsp, 3 * gprsize
+%endif
+%endmacro
+
+  xor       Xq, Xq                       ; x = 0
+  cmp       SOURCE_DX_ARG_REGq, 0x20000
+  jl        .lscaleend
+  mov       Xq, 0x8000                   ; x = 0.5 for 1/2 or less
+  jmp       .lscaleend
+
+.lscaleloop:
+%ifdef PIC
+  mov       Uq, U_PLANE                  ; PIC code saves U_PLANE on stack.
+%endif
+
+; Define macros for scaling YUV components since they are reused.
+%macro SCALEUV 1
+  mov       TEMPq, Xq
+  sar       TEMPq, 0x11
+  movzx     COMPLd, BYTE [%1 + TEMPq]
+  movzx     COMPRd, BYTE [%1 + TEMPq + 1]
+  mov       TEMPq, Xq
+  and       TEMPq, 0x1fffe
+  imul      COMPRq, TEMPq
+  xor       TEMPq, 0x1fffe
+  imul      COMPLq, TEMPq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 17
+%endmacro
+  SCALEUV   Uq                           ; Use the above macro to scale U
+  movq      mm0, [TABLE + 2048 + 8 * COMPLq]
+
+  mov       Vq, V_PLANE                  ; Read V address from stack
+  SCALEUV   Vq                           ; Use the above macro to scale V
+  paddsw    mm0, [TABLE + 4096 + 8 * COMPLq]
+
+%macro SCALEY 0
+  mov       TEMPq, Xq
+  sar       TEMPq, 0x10
+  movzx     COMPLd, BYTE [Yq + TEMPq]
+  movzx     COMPRd, BYTE [Yq + TEMPq + 1]
+  mov       TEMPq, Xq
+  add       Xq, SOURCE_DX                 ; Add source_dx from stack
+  and       TEMPq, 0xffff
+  imul      COMPRq, TEMPq
+  xor       TEMPq, 0xffff
+  imul      COMPLq, TEMPq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 16
+%endmacro
+  SCALEY                                  ; Use the above macro to scale Y1
+  movq      mm1, [TABLE + 8 * COMPLq]
+
+  cmp       Xq, SOURCE_WIDTH              ; Compare source_width from stack
+  jge       .lscalelastpixel
+
+  SCALEY                                  ; Use the above macro to sacle Y2
+  movq      mm2, [TABLE + 8 * COMPLq]
+
+  paddsw    mm1, mm0
+  paddsw    mm2, mm0
+  psraw     mm1, 0x6
+  psraw     mm2, 0x6
+  packuswb  mm1, mm2
+  MOVQ      [ARGBq], mm1
+  add       ARGBq, 0x8
+
+.lscaleend:
+  cmp       Xq, SOURCE_WIDTH     ; Compare source_width from stack
+  jl        .lscaleloop
+  EPILOGUE
+  RET
+
+.lscalelastpixel:
+  paddsw    mm1, mm0
+  psraw     mm1, 6
+  packuswb  mm1, mm1
+  movd      [ARGBq], mm1
+  EPILOGUE
+  RET
diff --git a/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm b/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm
new file mode 100644
index 0000000..db7854457
--- /dev/null
+++ b/media/base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm
@@ -0,0 +1,142 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX instructions.
+;
+  SECTION_TEXT
+  CPU       MMX
+
+%define SYMBOL LinearScaleYUVToRGB32Row_MMX_X64
+  global    mangle(SYMBOL) PRIVATE
+  align     function_align
+
+mangle(SYMBOL):
+  %assign   stack_offset 0
+  extern    mangle(kCoefficientsRgbY)
+
+; Parameters are in the following order:
+; 1. Y plane
+; 2. U plane
+; 3. V plane
+; 4. ARGB frame
+; 5. Width
+; 6. Source dx
+
+PROLOGUE  6, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, COMPL
+
+%define     TABLEq     r10
+%define     Xq         r11
+%define     INDEXq     r12
+%define     COMPRd     r13d
+%define     COMPRq     r13
+%define     FRACTIONq  r14
+
+  PUSH      TABLEq
+  PUSH      Xq
+  PUSH      INDEXq
+  PUSH      COMPRq
+  PUSH      FRACTIONq
+
+%macro EPILOGUE 0
+  POP       FRACTIONq
+  POP       COMPRq
+  POP       INDEXq
+  POP       Xq
+  POP       TABLEq
+%endmacro
+
+  LOAD_SYM  TABLEq, mangle(kCoefficientsRgbY)
+
+  imul      WIDTHq, SOURCE_DXq           ; source_width = width * source_dx
+  xor       Xq, Xq                       ; x = 0
+  cmp       SOURCE_DXq, 0x20000
+  jl        .lscaleend
+  mov       Xq, 0x8000                   ; x = 0.5 for 1/2 or less
+  jmp       .lscaleend
+
+.lscaleloop:
+  ; Interpolate U
+  mov       INDEXq, Xq
+  sar       INDEXq, 0x11
+  movzx     COMPLd, BYTE [Uq + INDEXq]
+  movzx     COMPRd, BYTE [Uq + INDEXq + 1]
+  mov       FRACTIONq, Xq
+  and       FRACTIONq, 0x1fffe
+  imul      COMPRq, FRACTIONq
+  xor       FRACTIONq, 0x1fffe
+  imul      COMPLq, FRACTIONq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 17
+  movq      mm0, [TABLEq + 2048 + 8 * COMPLq]
+
+  ; Interpolate V
+  movzx     COMPLd, BYTE [Vq + INDEXq]
+  movzx     COMPRd, BYTE [Vq + INDEXq + 1]
+  ; Trick here to imul COMPL first then COMPR.
+  ; Saves two instruction. :)
+  imul      COMPLq, FRACTIONq
+  xor       FRACTIONq, 0x1fffe
+  imul      COMPRq, FRACTIONq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 17
+  paddsw    mm0, [TABLEq + 4096 + 8 * COMPLq]
+
+  ; Interpolate first Y1.
+  lea       INDEXq, [Xq + SOURCE_DXq]   ; INDEXq now points to next pixel.
+                                        ; Xq points to current pixel.
+  mov       FRACTIONq, Xq
+  sar       Xq, 0x10
+  movzx     COMPLd, BYTE [Yq + Xq]
+  movzx     COMPRd, BYTE [Yq + Xq + 1]
+  and       FRACTIONq, 0xffff
+  imul      COMPRq, FRACTIONq
+  xor       FRACTIONq, 0xffff
+  imul      COMPLq, FRACTIONq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 16
+  movq      mm1, [TABLEq + 8 * COMPLq]
+
+  ; Interpolate Y2 if available.
+  cmp       INDEXq, WIDTHq
+  jge       .lscalelastpixel
+
+  lea       Xq, [INDEXq + SOURCE_DXq]    ; Xq points to next pixel.
+                                         ; INDEXq points to current pixel.
+  mov       FRACTIONq, INDEXq
+  sar       INDEXq, 0x10
+  movzx     COMPLd, BYTE [Yq + INDEXq]
+  movzx     COMPRd, BYTE [Yq + INDEXq + 1]
+  and       FRACTIONq, 0xffff
+  imul      COMPRq, FRACTIONq
+  xor       FRACTIONq, 0xffff
+  imul      COMPLq, FRACTIONq
+  add       COMPLq, COMPRq
+  shr       COMPLq, 16
+  movq      mm2, [TABLEq + 8 * COMPLq]
+
+  paddsw    mm1, mm0
+  paddsw    mm2, mm0
+  psraw     mm1, 0x6
+  psraw     mm2, 0x6
+  packuswb  mm1, mm2
+  movntq    [ARGBq], mm1
+  add       ARGBq, 0x8
+
+.lscaleend:
+  cmp       Xq, WIDTHq
+  jl        .lscaleloop
+  jmp       .epilogue
+
+.lscalelastpixel:
+  paddsw    mm1, mm0
+  psraw     mm1, 6
+  packuswb  mm1, mm1
+  movd      [ARGBq], mm1
+
+.epilogue
+  EPILOGUE
+  RET
diff --git a/media/base/simd/linear_scale_yuv_to_rgb_sse.asm b/media/base/simd/linear_scale_yuv_to_rgb_sse.asm
new file mode 100644
index 0000000..847911c
--- /dev/null
+++ b/media/base/simd/linear_scale_yuv_to_rgb_sse.asm
@@ -0,0 +1,23 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX and SSE instructions.
+;
+  SECTION_TEXT
+  CPU       MMX, SSE
+
+; Use movq to save the output.
+%define MOVQ movntq
+
+; void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
+;                                   const uint8* u_buf,
+;                                   const uint8* v_buf,
+;                                   uint8* rgb_buf,
+;                                   int width,
+;                                   int source_dx);
+%define SYMBOL LinearScaleYUVToRGB32Row_SSE
+%include "linear_scale_yuv_to_rgb_mmx.inc"
diff --git a/media/base/simd/scale_yuv_to_rgb_mmx.asm b/media/base/simd/scale_yuv_to_rgb_mmx.asm
new file mode 100644
index 0000000..6a83757
--- /dev/null
+++ b/media/base/simd/scale_yuv_to_rgb_mmx.asm
@@ -0,0 +1,23 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX instructions.
+;
+  SECTION_TEXT
+  CPU       MMX
+
+; Use movq to save the output.
+%define MOVQ movq
+
+; void ScaleYUVToRGB32Row_MMX(const uint8* y_buf,
+;                             const uint8* u_buf,
+;                             const uint8* v_buf,
+;                             uint8* rgb_buf,
+;                             int width,
+;                             int source_dx);
+%define SYMBOL ScaleYUVToRGB32Row_MMX
+%include "scale_yuv_to_rgb_mmx.inc"
diff --git a/media/base/simd/scale_yuv_to_rgb_mmx.inc b/media/base/simd/scale_yuv_to_rgb_mmx.inc
new file mode 100644
index 0000000..94c101c
--- /dev/null
+++ b/media/base/simd/scale_yuv_to_rgb_mmx.inc
@@ -0,0 +1,115 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+  global    mangle(SYMBOL) PRIVATE
+  align     function_align
+
+mangle(SYMBOL):
+  %assign   stack_offset 0
+
+  extern    mangle(kCoefficientsRgbY)
+
+; Parameters are in the following order:
+; 1. Y plane
+; 2. U plane
+; 3. V plane
+; 4. ARGB frame
+; 5. Width
+; 6. Source dx
+
+PROLOGUE  6, 7, 3, Y, U, V, ARGB, R1, R2, TEMP
+
+%ifdef ARCH_X86_64
+%define     WORD_SIZE   QWORD
+%else
+%define     WORD_SIZE   DWORD
+%endif
+
+%ifdef PIC
+  PUSH      R1q  ; Width
+%endif
+  PUSH      R2q  ; Source dx
+
+%define     SOURCE_DX   WORD_SIZE [rsp]
+
+; PIC code.
+%ifdef PIC
+  LOAD_SYM  R1q, mangle(kCoefficientsRgbY)
+%define     WIDTH       WORD_SIZE [rsp + gprsize]
+%define     TABLE       R1q  
+%define     Xq           R2q
+
+; Non-PIC code.
+%else
+%define     WIDTH       R1q
+%define     TABLE       mangle(kCoefficientsRgbY)
+%define     Xq           R2q
+%endif
+
+  ; Set Xq index to 0.
+  xor       Xq, Xq
+  jmp       .scaleend
+
+.scaleloop:
+  ; TABLE can either be a register or a symbol depending on this is
+  ; PIC or not.
+  mov       TEMPq, Xq
+  sar       TEMPq, 17
+  movzx     TEMPd, BYTE [Uq + TEMPq]
+  movq      mm0, [TABLE + 2048 + 8 * TEMPq]
+  mov       TEMPq, Xq
+  sar       TEMPq, 17
+  movzx     TEMPd, BYTE [Vq + TEMPq]
+  paddsw    mm0, [TABLE + 4096 + 8 * TEMPq]
+  mov       TEMPq, Xq
+  add       Xq, SOURCE_DX
+  sar       TEMPq, 16
+  movzx     TEMPd, BYTE [Yq + TEMPq]
+  movq      mm1, [TABLE + 8 * TEMPq]
+  mov       TEMPq, Xq
+  add       Xq, SOURCE_DX
+  sar       TEMPq, 16
+  movzx     TEMPd, BYTE [Yq + TEMPq]
+  movq      mm2, [TABLE + 8 * TEMPq]
+  paddsw    mm1, mm0
+  paddsw    mm2, mm0
+  psraw     mm1, 6
+  psraw     mm2, 6
+  packuswb  mm1, mm2
+  MOVQ      QWORD [ARGBq], mm1
+  add       ARGBq, 8
+
+.scaleend:
+  ; WIDTH can either be a register or memory depending on this is
+  ; PIC or not.
+  sub       WIDTH, 2
+  jns       .scaleloop
+
+  and       WIDTH, 1             ; odd number of pixels?
+  jz        .scaledone
+
+  mov       TEMPq, Xq
+  sar       TEMPq, 17
+  movzx     TEMPd, BYTE [Uq + TEMPq]
+  movq      mm0, [TABLE + 2048 + 8 * TEMPq]
+  mov       TEMPq, Xq
+  sar       TEMPq, 17
+  movzx     TEMPd, BYTE [Vq + TEMPq]
+  paddsw    mm0, [TABLE + 4096 + 8 * TEMPq]
+  mov       TEMPq, Xq
+  sar       TEMPq, 16
+  movzx     TEMPd, BYTE [Yq + TEMPq]
+  movq      mm1, [TABLE + 8 * TEMPq]
+  paddsw    mm1, mm0
+  psraw     mm1, 6
+  packuswb  mm1, mm1
+  movd      DWORD [ARGBq], mm1
+
+.scaledone:
+%ifdef PIC
+  ADD       rsp, 2 * gprsize
+%else
+  ADD       rsp, gprsize
+%endif
+  RET
diff --git a/media/base/simd/scale_yuv_to_rgb_sse.asm b/media/base/simd/scale_yuv_to_rgb_sse.asm
new file mode 100644
index 0000000..bdd5625
--- /dev/null
+++ b/media/base/simd/scale_yuv_to_rgb_sse.asm
@@ -0,0 +1,31 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX and SSE instructions.
+;
+  SECTION_TEXT
+  CPU       MMX, SSE
+
+; Use movq to save the output.
+%define MOVQ movntq
+
+; void ScaleYUVToRGB32Row_SSE(const uint8* y_buf,
+;                             const uint8* u_buf,
+;                             const uint8* v_buf,
+;                             uint8* rgb_buf,
+;                             int width,
+;                             int source_dx);
+%define SYMBOL ScaleYUVToRGB32Row_SSE
+%include "scale_yuv_to_rgb_mmx.inc"
+
+; void LinearScaleYUVToRGB32Row_SSE(const uint8* y_buf,
+;                                   const uint8* u_buf,
+;                                   const uint8* v_buf,
+;                                   uint8* rgb_buf,
+;                                   int width,
+;                                   int source_dx);
+
diff --git a/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm b/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm
new file mode 100644
index 0000000..e021457
--- /dev/null
+++ b/media/base/simd/scale_yuv_to_rgb_sse2_x64.asm
@@ -0,0 +1,109 @@
+; Copyright (c) 2011 The Chromium Authors. All rights reserved.
+; Use of this source code is governed by a BSD-style license that can be
+; found in the LICENSE file.
+
+%include "x86inc.asm"
+
+;
+; This file uses MMX, SSE2 and instructions.
+;
+  SECTION_TEXT
+  CPU       SSE2
+
+; void ScaleYUVToRGB32Row_SSE2_X64(const uint8* y_buf,
+;                                  const uint8* u_buf,
+;                                  const uint8* v_buf,
+;                                  uint8* rgb_buf,
+;                                  int width,
+;                                  int source_dx);
+%define SYMBOL ScaleYUVToRGB32Row_SSE2_X64
+
+  global    mangle(SYMBOL) PRIVATE
+  align     function_align
+
+mangle(SYMBOL):
+  %assign   stack_offset 0
+  extern    mangle(kCoefficientsRgbY)
+
+; Parameters are in the following order:
+; 1. Y plane
+; 2. U plane
+; 3. V plane
+; 4. ARGB frame
+; 5. Width
+; 6. Source dx
+
+PROLOGUE  6, 7, 3, Y, U, V, ARGB, WIDTH, SOURCE_DX, COMP
+
+%define     TABLEq   r10
+%define     Xq       r11
+%define     INDEXq   r12
+  PUSH      r10
+  PUSH      r11
+  PUSH      r12
+
+  LOAD_SYM  TABLEq, mangle(kCoefficientsRgbY)
+
+  ; Set Xq index to 0.
+  xor       Xq, Xq
+  jmp       .scaleend
+
+.scaleloop:
+  ; Read UV pixels.
+  mov       INDEXq, Xq
+  sar       INDEXq, 17
+  movzx     COMPd, BYTE [Uq + INDEXq]
+  movq      xmm0, [TABLEq + 2048 + 8 * COMPq]
+  movzx     COMPd, BYTE [Vq + INDEXq]
+  movq      xmm1, [TABLEq + 4096 + 8 * COMPq]
+
+  ; Read first Y pixel.
+  lea       INDEXq, [Xq + SOURCE_DXq] ; INDEXq nows points to next pixel.
+  sar       Xq, 16
+  movzx     COMPd, BYTE [Yq + Xq]
+  paddsw    xmm0, xmm1		      ; Hide a ADD after memory load.
+  movq      xmm1, [TABLEq + 8 * COMPq]
+
+  ;  Read next Y pixel.
+  lea       Xq, [INDEXq + SOURCE_DXq] ; Xq now points to next pixel.
+  sar       INDEXq, 16
+  movzx     COMPd, BYTE [Yq + INDEXq]
+  movq      xmm2, [TABLEq + 8 * COMPq]
+  paddsw    xmm1, xmm0
+  paddsw    xmm2, xmm0
+  shufps    xmm1, xmm2, 0x44          ; Join two pixels into one XMM register
+  psraw     xmm1, 6
+  packuswb  xmm1, xmm1
+  movq      QWORD [ARGBq], xmm1
+  add       ARGBq, 8
+
+.scaleend:
+  sub       WIDTHq, 2
+  jns       .scaleloop
+
+  and       WIDTHq, 1                 ; odd number of pixels?
+  jz        .scaledone
+
+  ; Read U V components.
+  mov       INDEXq, Xq
+  sar       INDEXq, 17
+  movzx     COMPd, BYTE [Uq + INDEXq]
+  movq      xmm0, [TABLEq + 2048 + 8 * COMPq]
+  movzx     COMPd, BYTE [Vq + INDEXq]
+  paddsw    xmm0, [TABLEq + 4096 + 8 * COMPq]
+
+  ; Read one Y component.
+  mov       INDEXq, Xq
+  sar       INDEXq, 16
+  movzx     COMPd, BYTE [Yq + INDEXq]
+  movq      xmm1, [TABLEq + 8 * COMPq]
+  paddsw    xmm1, xmm0
+  psraw     xmm1, 6
+  packuswb  xmm1, xmm1
+  movd      DWORD [ARGBq], xmm1
+
+.scaledone:
+  POP       r12
+  POP       r11
+  POP       r10
+  RET
diff --git a/media/base/simd/x86inc.asm b/media/base/simd/x86inc.asm
index 956b999..5e0ca20 100644
--- a/media/base/simd/x86inc.asm
+++ b/media/base/simd/x86inc.asm
@@ -95,11 +95,14 @@
 %ifdef WIN64
     %define PIC
 %elifndef ARCH_X86_64
+; For chromium we may build PIC code even for 32 bits system.
+%ifndef CHROMIUM
 ; x86_32 doesn't require PIC.
 ; Some distros prefer shared objects to be PIC, but nothing breaks if
 ; the code contains a few textrels, so we'll skip that complexity.
     %undef PIC
 %endif
+%endif
 %ifdef PIC
     default rel
 %endif
@@ -947,6 +950,11 @@ AVX_INSTR pfmul, 1, 0
 ;=============================================================================
 
 %ifdef CHROMIUM
+; Always build PIC code on Mac for Chromium.
+%ifdef MACHO
+%define PIC
+%endif
+
 ;
 ; LOAD_SYM %1 (reg), %2 (sym)
 ; Copies the address to a local symbol to the specified register.
diff --git a/media/base/yuv_convert.cc b/media/base/yuv_convert.cc
index cbf7f57..22f1a24 100644
--- a/media/base/yuv_convert.cc
+++ b/media/base/yuv_convert.cc
@@ -17,151 +17,94 @@
 
 #include "media/base/yuv_convert.h"
 
+#include "base/logging.h"
 #include "build/build_config.h"
 #include "media/base/cpu_features.h"
 #include "media/base/simd/convert_rgb_to_yuv.h"
+#include "media/base/simd/convert_yuv_to_rgb.h"
+#include "media/base/simd/filter_yuv.h"
 #include "media/base/yuv_convert_internal.h"
 #include "media/base/yuv_row.h"
 
-#if USE_MMX
+#if defined(ARCH_CPU_X86_FAMILY)
 #if defined(_MSC_VER)
 #include <intrin.h>
 #else
+#include <emmintrin.h>
 #include <mmintrin.h>
 #endif
 #endif
 
-#if USE_SSE2
-#include <emmintrin.h>
-#endif
-
 namespace media {
 
-// 16.16 fixed point arithmetic
-const int kFractionBits = 16;
-const int kFractionMax = 1 << kFractionBits;
-const int kFractionMask = ((1 << kFractionBits) - 1);
-
-// Convert a frame of YUV to 32 bit ARGB.
-void ConvertYUVToRGB32(const uint8* y_buf,
-                       const uint8* u_buf,
-                       const uint8* v_buf,
-                       uint8* rgb_buf,
-                       int width,
-                       int height,
-                       int y_pitch,
-                       int uv_pitch,
-                       int rgb_pitch,
-                       YUVType yuv_type) {
-  unsigned int y_shift = yuv_type;
-  for (int y = 0; y < height; ++y) {
-    uint8* rgb_row = rgb_buf + y * rgb_pitch;
-    const uint8* y_ptr = y_buf + y * y_pitch;
-    const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch;
-    const uint8* v_ptr = v_buf + (y >> y_shift) * uv_pitch;
-
-    FastConvertYUVToRGB32Row(y_ptr,
-                             u_ptr,
-                             v_ptr,
-                             rgb_row,
-                             width);
-  }
-
-  // MMX used for FastConvertYUVToRGB32Row requires emms instruction.
-  EMMS();
+static FilterYUVRowsProc ChooseFilterYUVRowsProc() {
+#if defined(ARCH_CPU_X86_FAMILY)
+  if (hasSSE2())
+    return &FilterYUVRows_SSE2;
+  if (hasMMX())
+    return &FilterYUVRows_MMX;
+#endif
+  return &FilterYUVRows_C;
 }
 
-#if USE_SSE2
-// FilterRows combines two rows of the image using linear interpolation.
-// SSE2 version does 16 pixels at a time
-
-static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-                       int source_width, int source_y_fraction) {
-  __m128i zero = _mm_setzero_si128();
-  __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
-  __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
-
-  const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
-  const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
-  __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
-  __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
-
-  do {
-    __m128i y0 = _mm_loadu_si128(y0_ptr128);
-    __m128i y1 = _mm_loadu_si128(y1_ptr128);
-    __m128i y2 = _mm_unpackhi_epi8(y0, zero);
-    __m128i y3 = _mm_unpackhi_epi8(y1, zero);
-    y0 = _mm_unpacklo_epi8(y0, zero);
-    y1 = _mm_unpacklo_epi8(y1, zero);
-    y0 = _mm_mullo_epi16(y0, y0_fraction);
-    y1 = _mm_mullo_epi16(y1, y1_fraction);
-    y2 = _mm_mullo_epi16(y2, y0_fraction);
-    y3 = _mm_mullo_epi16(y3, y1_fraction);
-    y0 = _mm_add_epi16(y0, y1);
-    y2 = _mm_add_epi16(y2, y3);
-    y0 = _mm_srli_epi16(y0, 8);
-    y2 = _mm_srli_epi16(y2, 8);
-    y0 = _mm_packus_epi16(y0, y2);
-    *dest128++ = y0;
-    ++y0_ptr128;
-    ++y1_ptr128;
-  } while (dest128 < end128);
+static ConvertYUVToRGB32RowProc ChooseConvertYUVToRGB32RowProc() {
+#if defined(ARCH_CPU_X86_FAMILY)
+  if (hasSSE())
+    return &ConvertYUVToRGB32Row_SSE;
+  if (hasMMX())
+    return &ConvertYUVToRGB32Row_MMX;
+#endif
+  return &ConvertYUVToRGB32Row_C;
 }
-#elif USE_MMX
-// MMX version does 8 pixels at a time
-static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-                       int source_width, int source_y_fraction) {
-  __m64 zero = _mm_setzero_si64();
-  __m64 y1_fraction = _mm_set1_pi16(source_y_fraction);
-  __m64 y0_fraction = _mm_set1_pi16(256 - source_y_fraction);
-
-  const __m64* y0_ptr64 = reinterpret_cast<const __m64*>(y0_ptr);
-  const __m64* y1_ptr64 = reinterpret_cast<const __m64*>(y1_ptr);
-  __m64* dest64 = reinterpret_cast<__m64*>(ybuf);
-  __m64* end64 = reinterpret_cast<__m64*>(ybuf + source_width);
-
-  do {
-    __m64 y0 = *y0_ptr64++;
-    __m64 y1 = *y1_ptr64++;
-    __m64 y2 = _mm_unpackhi_pi8(y0, zero);
-    __m64 y3 = _mm_unpackhi_pi8(y1, zero);
-    y0 = _mm_unpacklo_pi8(y0, zero);
-    y1 = _mm_unpacklo_pi8(y1, zero);
-    y0 = _mm_mullo_pi16(y0, y0_fraction);
-    y1 = _mm_mullo_pi16(y1, y1_fraction);
-    y2 = _mm_mullo_pi16(y2, y0_fraction);
-    y3 = _mm_mullo_pi16(y3, y1_fraction);
-    y0 = _mm_add_pi16(y0, y1);
-    y2 = _mm_add_pi16(y2, y3);
-    y0 = _mm_srli_pi16(y0, 8);
-    y2 = _mm_srli_pi16(y2, 8);
-    y0 = _mm_packs_pu16(y0, y2);
-    *dest64++ = y0;
-  } while (dest64 < end64);
+
+static ScaleYUVToRGB32RowProc ChooseScaleYUVToRGB32RowProc() {
+#if defined(ARCH_CPU_X86_FAMILY)
+#if defined(ARCH_CPU_X86_64)
+  // Use 64-bits version if possible.
+  return &ScaleYUVToRGB32Row_SSE2_X64;
+#endif
+  // Choose the best one on 32-bits system.
+  if (hasSSE())
+    return &ScaleYUVToRGB32Row_SSE;
+  if (hasMMX())
+    return &ScaleYUVToRGB32Row_MMX;
+#endif
+  return &ScaleYUVToRGB32Row_C;
 }
-#else  // no MMX or SSE2
-// C version does 8 at a time to mimic MMX code
-static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
-                       int source_width, int source_y_fraction) {
-  int y1_fraction = source_y_fraction;
-  int y0_fraction = 256 - y1_fraction;
-  uint8* end = ybuf + source_width;
-  do {
-    ybuf[0] = (y0_ptr[0] * y0_fraction + y1_ptr[0] * y1_fraction) >> 8;
-    ybuf[1] = (y0_ptr[1] * y0_fraction + y1_ptr[1] * y1_fraction) >> 8;
-    ybuf[2] = (y0_ptr[2] * y0_fraction + y1_ptr[2] * y1_fraction) >> 8;
-    ybuf[3] = (y0_ptr[3] * y0_fraction + y1_ptr[3] * y1_fraction) >> 8;
-    ybuf[4] = (y0_ptr[4] * y0_fraction + y1_ptr[4] * y1_fraction) >> 8;
-    ybuf[5] = (y0_ptr[5] * y0_fraction + y1_ptr[5] * y1_fraction) >> 8;
-    ybuf[6] = (y0_ptr[6] * y0_fraction + y1_ptr[6] * y1_fraction) >> 8;
-    ybuf[7] = (y0_ptr[7] * y0_fraction + y1_ptr[7] * y1_fraction) >> 8;
-    y0_ptr += 8;
-    y1_ptr += 8;
-    ybuf += 8;
-  } while (ybuf < end);
+
+static ScaleYUVToRGB32RowProc ChooseLinearScaleYUVToRGB32RowProc() {
+#if defined(ARCH_CPU_X86_FAMILY)
+#if defined(ARCH_CPU_X86_64)
+  // Use 64-bits version if possible.
+  return &LinearScaleYUVToRGB32Row_MMX_X64;
+#endif
+  // 32-bits system.
+  if (hasSSE())
+    return &LinearScaleYUVToRGB32Row_SSE;
+  if (hasMMX())
+    return &LinearScaleYUVToRGB32Row_MMX;
+#endif
+  return &LinearScaleYUVToRGB32Row_C;
 }
+
+// Empty SIMD registers state after using them.
+void EmptyRegisterState() {
+#if defined(ARCH_CPU_X86_FAMILY)
+  static bool checked = false;
+  static bool has_mmx = false;
+  if (!checked) {
+    has_mmx = hasMMX();
+    checked = true;
+  }
+  if (has_mmx)
+    _mm_empty();
 #endif
+}
 
+// 16.16 fixed point arithmetic
+const int kFractionBits = 16;
+const int kFractionMax = 1 << kFractionBits;
+const int kFractionMask = ((1 << kFractionBits) - 1);
 
 // Scale a frame of YUV to 32 bit ARGB.
 void ScaleYUVToRGB32(const uint8* y_buf,
@@ -178,6 +121,20 @@ void ScaleYUVToRGB32(const uint8* y_buf,
                      YUVType yuv_type,
                      Rotate view_rotate,
                      ScaleFilter filter) {
+  static FilterYUVRowsProc filter_proc = NULL;
+  static ConvertYUVToRGB32RowProc convert_proc = NULL;
+  static ScaleYUVToRGB32RowProc scale_proc = NULL;
+  static ScaleYUVToRGB32RowProc linear_scale_proc = NULL;
+
+  if (!filter_proc)
+    filter_proc = ChooseFilterYUVRowsProc();
+  if (!convert_proc)
+    convert_proc = ChooseConvertYUVToRGB32RowProc();
+  if (!scale_proc)
+    scale_proc = ChooseScaleYUVToRGB32RowProc();
+  if (!linear_scale_proc)
+    linear_scale_proc = ChooseLinearScaleYUVToRGB32RowProc();
+
   // Handle zero sized sources and destinations.
   if ((yuv_type == YV12 && (source_width < 2 || source_height < 2)) ||
       (yuv_type == YV16 && (source_width < 2 || source_height < 1)) ||
@@ -225,9 +182,6 @@ void ScaleYUVToRGB32(const uint8* y_buf,
 
   int source_dx = source_width * kFractionMax / width;
   int source_dy = source_height * kFractionMax / height;
-#if USE_MMX && defined(_MSC_VER)
-  int source_dx_uv = source_dx;
-#endif
 
   if ((view_rotate == ROTATE_90) ||
       (view_rotate == ROTATE_270)) {
@@ -240,9 +194,6 @@ void ScaleYUVToRGB32(const uint8* y_buf,
     int original_dx = source_dx;
     int original_dy = source_dy;
     source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits;
-#if USE_MMX && defined(_MSC_VER)
-    source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits;
-#endif
     source_dy = original_dx;
     if (view_rotate == ROTATE_90) {
       y_pitch = -1;
@@ -294,7 +245,7 @@ void ScaleYUVToRGB32(const uint8* y_buf,
     if (filter & media::FILTER_BILINEAR_V) {
       if (yscale_fixed != kFractionMax &&
           source_y_fraction && ((source_y + 1) < source_height)) {
-        FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
+        filter_proc(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
       } else {
         memcpy(ybuf, y0_ptr, source_width);
       }
@@ -304,8 +255,8 @@ void ScaleYUVToRGB32(const uint8* y_buf,
       if (yscale_fixed != kFractionMax &&
           source_uv_fraction &&
           (((source_y >> y_shift) + 1) < (source_height >> y_shift))) {
-        FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
-        FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
+        filter_proc(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction);
+        filter_proc(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction);
       } else {
         memcpy(ubuf, u0_ptr, uv_source_width);
         memcpy(vbuf, v0_ptr, uv_source_width);
@@ -316,41 +267,17 @@ void ScaleYUVToRGB32(const uint8* y_buf,
       vbuf[uv_source_width] = vbuf[uv_source_width - 1];
     }
     if (source_dx == kFractionMax) {  // Not scaled
-      FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                               dest_pixel, width);
+      convert_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width);
     } else {
       if (filter & FILTER_BILINEAR_H) {
-        LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                 dest_pixel, width, source_dx);
-    } else {
-// Specialized scalers and rotation.
-#if USE_MMX && defined(_MSC_VER)
-        if (width == (source_width * 2)) {
-          DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                              dest_pixel, width);
-        } else if ((source_dx & kFractionMask) == 0) {
-          // Scaling by integer scale factor. ie half.
-          ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                               dest_pixel, width,
-                               source_dx >> kFractionBits);
-        } else if (source_dx_uv == source_dx) {  // Not rotated.
-          ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                             dest_pixel, width, source_dx);
-        } else {
-          RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                                     dest_pixel, width,
-                                     source_dx >> kFractionBits,
-                                     source_dx_uv >> kFractionBits);
-        }
-#else
-        ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
-                           dest_pixel, width, source_dx);
-#endif
+        linear_scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx);
+      } else {
+        scale_proc(y_ptr, u_ptr, v_ptr, dest_pixel, width, source_dx);
       }
     }
   }
-  // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms.
-  EMMS();
+
+  EmptyRegisterState();
 }
 
 void ConvertRGB32ToYUV(const uint8* rgbframe,
@@ -371,7 +298,9 @@ void ConvertRGB32ToYUV(const uint8* rgbframe,
     convert_proc = &ConvertRGB32ToYUV_C;
 #else
     // For x86 processors, check if SSSE3 (or SSE2) is supported.
-    if (hasSSE2())
+    if (hasSSSE3())
+      convert_proc = &ConvertRGB32ToYUV_SSSE3;
+    else if (hasSSE2())
       convert_proc = &ConvertRGB32ToYUV_SSE2;
     else
       convert_proc = &ConvertRGB32ToYUV_C;
@@ -391,8 +320,21 @@ void ConvertRGB24ToYUV(const uint8* rgbframe,
                        int rgbstride,
                        int ystride,
                        int uvstride) {
+#if defined(ARCH_CPU_ARM_FAMILY)
   ConvertRGB24ToYUV_C(rgbframe, yplane, uplane, vplane, width, height,
                       rgbstride, ystride, uvstride);
+#else
+  static void (*convert_proc)(const uint8*, uint8*, uint8*, uint8*,
+                              int, int, int, int, int) = NULL;
+  if (!convert_proc) {
+    if (hasSSSE3())
+      convert_proc = &ConvertRGB24ToYUV_SSSE3;
+    else
+      convert_proc = &ConvertRGB24ToYUV_C;
+  }
+  convert_proc(rgbframe, yplane, uplane, vplane, width, height,
+               rgbstride, ystride, uvstride);
+#endif
 }
 
 void ConvertYUY2ToYUV(const uint8* src,
@@ -403,4 +345,34 @@ void ConvertYUY2ToYUV(const uint8* src,
                       int height) {
   ConvertYUY2ToYUV_C(src, yplane, uplane, vplane, width, height);
 }
+
+void ConvertYUVToRGB32(const uint8* yplane,
+                       const uint8* uplane,
+                       const uint8* vplane,
+                       uint8* rgbframe,
+                       int width,
+                       int height,
+                       int ystride,
+                       int uvstride,
+                       int rgbstride,
+                       YUVType yuv_type) {
+#if defined(ARCH_CPU_ARM_FAMILY)
+  ConvertYUVToRGB32_C(yplane, uplane, vplane, rgbframe,
+                      width, height, ystride, uvstride, rgbstride, yuv_type);
+#else
+  static ConvertYUVToRGB32Proc convert_proc = NULL;
+  if (!convert_proc) {
+    if (hasSSE())
+      convert_proc = &ConvertYUVToRGB32_SSE;
+    else if (hasMMX())
+      convert_proc = &ConvertYUVToRGB32_MMX;
+    else
+      convert_proc = &ConvertYUVToRGB32_C;
+  }
+
+  convert_proc(yplane, uplane, vplane, rgbframe,
+               width, height, ystride, uvstride, rgbstride, yuv_type);
+#endif
+}
+
 }  // namespace media
diff --git a/media/base/yuv_convert.h b/media/base/yuv_convert.h
index 20ba0aa..95b1780 100644
--- a/media/base/yuv_convert.h
+++ b/media/base/yuv_convert.h
@@ -94,6 +94,10 @@ void ConvertYUY2ToYUV(const uint8* src,
                       int width,
                       int height);
 
+// Empty SIMD register state after calling optimized scaler functions.
+// This method is only used in unit test after calling SIMD functions.
+void EmptyRegisterState();
+
 }  // namespace media
 
 #endif  // MEDIA_BASE_YUV_CONVERT_H_
diff --git a/media/base/yuv_convert_internal.h b/media/base/yuv_convert_internal.h
index 80776aa..7be14c4 100644
--- a/media/base/yuv_convert_internal.h
+++ b/media/base/yuv_convert_internal.h
@@ -13,15 +13,15 @@
 namespace media {
 
 // SSE2 version of converting RGBA to YV12.
-extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe,
-                                   uint8* yplane,
-                                   uint8* uplane,
-                                   uint8* vplane,
-                                   int width,
-                                   int height,
-                                   int rgbstride,
-                                   int ystride,
-                                   int uvstride);
+void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe,
+                            uint8* yplane,
+                            uint8* uplane,
+                            uint8* vplane,
+                            int width,
+                            int height,
+                            int rgbstride,
+                            int ystride,
+                            int uvstride);
 
 // This is a C reference implementation of the above routine.
 // This method should only be used in unit test.
diff --git a/media/base/yuv_convert_unittest.cc b/media/base/yuv_convert_unittest.cc
index 380897e..5de3b11 100644
--- a/media/base/yuv_convert_unittest.cc
+++ b/media/base/yuv_convert_unittest.cc
@@ -8,6 +8,7 @@
 #include "base/path_service.h"
 #include "media/base/cpu_features.h"
 #include "media/base/djb2.h"
+#include "media/base/simd/convert_yuv_to_rgb.h"
 #include "media/base/yuv_convert.h"
 #include "media/base/yuv_convert_internal.h"
 #include "media/base/yuv_row.h"
@@ -376,17 +377,7 @@ TEST(YUVConvertTest, RGB32ToYUV_SSE2_MatchReference) {
   scoped_array<uint8> yuv_converted_bytes(new uint8[kYUV12Size]);
   scoped_array<uint8> yuv_reference_bytes(new uint8[kYUV12Size]);
 
-  // Read YUV reference data from file.
-  FilePath yuv_url;
-  EXPECT_TRUE(PathService::Get(base::DIR_SOURCE_ROOT, &yuv_url));
-  yuv_url = yuv_url.Append(FILE_PATH_LITERAL("media"))
-                   .Append(FILE_PATH_LITERAL("test"))
-                   .Append(FILE_PATH_LITERAL("data"))
-                   .Append(FILE_PATH_LITERAL("bali_640x360_P420.yuv"));
-  EXPECT_EQ(static_cast<int>(kYUV12Size),
-            file_util::ReadFile(yuv_url,
-                                reinterpret_cast<char*>(yuv_bytes.get()),
-                                static_cast<int>(kYUV12Size)));
+  ReadYV12Data(&yuv_bytes);
 
   // Convert a frame of YUV to 32 bit ARGB.
   media::ConvertYUVToRGB32(
@@ -459,4 +450,241 @@ TEST(YUVConvertTest, RGB32ToYUV_SSE2_MatchReference) {
   // Make sure there's no difference from the reference.
   EXPECT_EQ(0, error);
 }
-#endif
+
+TEST(YUVConvertTest, ConvertYUVToRGB32Row_MMX) {
+  if (!media::hasMMX()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  ConvertYUVToRGB32Row_C(yuv_bytes.get(),
+                         yuv_bytes.get() + kSourceUOffset,
+                         yuv_bytes.get() + kSourceVOffset,
+                         rgb_bytes_reference.get(),
+                         kWidth);
+  ConvertYUVToRGB32Row_MMX(yuv_bytes.get(),
+                           yuv_bytes.get() + kSourceUOffset,
+                           yuv_bytes.get() + kSourceVOffset,
+                           rgb_bytes_converted.get(),
+                           kWidth);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, ConvertYUVToRGB32Row_SSE) {
+  if (!media::hasSSE()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  ConvertYUVToRGB32Row_C(yuv_bytes.get(),
+                         yuv_bytes.get() + kSourceUOffset,
+                         yuv_bytes.get() + kSourceVOffset,
+                         rgb_bytes_reference.get(),
+                         kWidth);
+  ConvertYUVToRGB32Row_SSE(yuv_bytes.get(),
+                           yuv_bytes.get() + kSourceUOffset,
+                           yuv_bytes.get() + kSourceVOffset,
+                           rgb_bytes_converted.get(),
+                           kWidth);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, ScaleYUVToRGB32Row_MMX) {
+  if (!media::hasMMX()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  ScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                       yuv_bytes.get() + kSourceUOffset,
+                       yuv_bytes.get() + kSourceVOffset,
+                       rgb_bytes_reference.get(),
+                       kWidth,
+                       kSourceDx);
+  ScaleYUVToRGB32Row_MMX(yuv_bytes.get(),
+                         yuv_bytes.get() + kSourceUOffset,
+                         yuv_bytes.get() + kSourceVOffset,
+                         rgb_bytes_converted.get(),
+                         kWidth,
+                         kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE) {
+  if (!media::hasSSE()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  ScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                       yuv_bytes.get() + kSourceUOffset,
+                       yuv_bytes.get() + kSourceVOffset,
+                       rgb_bytes_reference.get(),
+                       kWidth,
+                       kSourceDx);
+  ScaleYUVToRGB32Row_SSE(yuv_bytes.get(),
+                         yuv_bytes.get() + kSourceUOffset,
+                         yuv_bytes.get() + kSourceVOffset,
+                         rgb_bytes_converted.get(),
+                         kWidth,
+                         kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_MMX) {
+  if (!media::hasMMX()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  LinearScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                             yuv_bytes.get() + kSourceUOffset,
+                             yuv_bytes.get() + kSourceVOffset,
+                             rgb_bytes_reference.get(),
+                             kWidth,
+                             kSourceDx);
+  LinearScaleYUVToRGB32Row_MMX(yuv_bytes.get(),
+                               yuv_bytes.get() + kSourceUOffset,
+                               yuv_bytes.get() + kSourceVOffset,
+                               rgb_bytes_converted.get(),
+                               kWidth,
+                               kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_SSE) {
+  if (!media::hasSSE()) {
+    LOG(WARNING) << "System not supported. Test skipped.";
+    return;
+  }
+
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  LinearScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                             yuv_bytes.get() + kSourceUOffset,
+                             yuv_bytes.get() + kSourceVOffset,
+                             rgb_bytes_reference.get(),
+                             kWidth,
+                             kSourceDx);
+  LinearScaleYUVToRGB32Row_SSE(yuv_bytes.get(),
+                               yuv_bytes.get() + kSourceUOffset,
+                               yuv_bytes.get() + kSourceVOffset,
+                               rgb_bytes_converted.get(),
+                               kWidth,
+                               kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+#if defined(ARCH_CPU_X86_64)
+
+TEST(YUVConvertTest, ScaleYUVToRGB32Row_SSE2_X64) {
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  ScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                       yuv_bytes.get() + kSourceUOffset,
+                       yuv_bytes.get() + kSourceVOffset,
+                       rgb_bytes_reference.get(),
+                       kWidth,
+                       kSourceDx);
+  ScaleYUVToRGB32Row_SSE2_X64(yuv_bytes.get(),
+                              yuv_bytes.get() + kSourceUOffset,
+                              yuv_bytes.get() + kSourceVOffset,
+                              rgb_bytes_converted.get(),
+                              kWidth,
+                              kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+TEST(YUVConvertTest, LinearScaleYUVToRGB32Row_MMX_X64) {
+  scoped_array<uint8> yuv_bytes(new uint8[kYUV12Size]);
+  scoped_array<uint8> rgb_bytes_reference(new uint8[kRGBSize]);
+  scoped_array<uint8> rgb_bytes_converted(new uint8[kRGBSize]);
+  ReadYV12Data(&yuv_bytes);
+
+  const int kWidth = 167;
+  const int kSourceDx = 80000;  // This value means a scale down.
+  LinearScaleYUVToRGB32Row_C(yuv_bytes.get(),
+                             yuv_bytes.get() + kSourceUOffset,
+                             yuv_bytes.get() + kSourceVOffset,
+                             rgb_bytes_reference.get(),
+                             kWidth,
+                             kSourceDx);
+  LinearScaleYUVToRGB32Row_MMX_X64(yuv_bytes.get(),
+                                   yuv_bytes.get() + kSourceUOffset,
+                                   yuv_bytes.get() + kSourceVOffset,
+                                   rgb_bytes_converted.get(),
+                                   kWidth,
+                                   kSourceDx);
+  media::EmptyRegisterState();
+  EXPECT_EQ(0, memcmp(rgb_bytes_reference.get(),
+                      rgb_bytes_converted.get(),
+                      kWidth * kBpp));
+}
+
+#endif  // defined(ARCH_CPU_X86_64)
+
+#endif  // defined(ARCH_CPU_X86_FAMILY)
diff --git a/media/base/yuv_row_posix.cc b/media/base/yuv_row_posix.cc
index 2217f38..f839de8 100644
--- a/media/base/yuv_row_posix.cc
+++ b/media/base/yuv_row_posix.cc
@@ -920,4 +920,3 @@ void LinearScaleYUVToRGB32Row(const uint8* y_buf,
 
 #endif  // USE_MMX
 }  // extern "C"
-
diff --git a/media/media.gyp b/media/media.gyp
index 5282afb..6ecd6af 100644
--- a/media/media.gyp
+++ b/media/media.gyp
@@ -341,7 +341,12 @@
       'conditions': [
         [ 'target_arch == "ia32" or target_arch == "x64"', {
           'dependencies': [
-            'yuv_convert_sse2',
+            'yuv_convert_simd_x86',
+          ],
+        }],
+        [ 'target_arch == "arm"', {
+          'dependencies': [
+            'yuv_convert_simd_arm',
           ],
         }],
       ],
@@ -357,15 +362,45 @@
       ],
     },
     {
-      'target_name': 'yuv_convert_sse2',
+      'target_name': 'yuv_convert_simd_x86',
       'type': 'static_library',
       'include_dirs': [
         '..',
       ],
+      'sources': [
+        'base/yuv_convert_sse2.cc',
+        'base/simd/convert_rgb_to_yuv_x86.cc',
+        'base/simd/convert_rgb_to_yuv_ssse3.asm',
+        'base/simd/convert_rgb_to_yuv_ssse3.inc',
+        'base/simd/convert_yuv_to_rgb_c.cc',
+        'base/simd/convert_yuv_to_rgb_x86.cc',
+        'base/simd/convert_yuv_to_rgb_mmx.asm',
+        'base/simd/convert_yuv_to_rgb_mmx.inc',
+        'base/simd/convert_yuv_to_rgb_sse.asm',
+        'base/simd/filter_yuv.h',
+        'base/simd/filter_yuv_c.cc',
+        'base/simd/filter_yuv_mmx.cc',
+        'base/simd/filter_yuv_sse2.cc',
+        'base/simd/linear_scale_yuv_to_rgb_mmx.asm',
+        'base/simd/linear_scale_yuv_to_rgb_mmx.inc',
+        'base/simd/linear_scale_yuv_to_rgb_sse.asm',
+        'base/simd/scale_yuv_to_rgb_mmx.asm',
+        'base/simd/scale_yuv_to_rgb_mmx.inc',
+        'base/simd/scale_yuv_to_rgb_sse.asm',
+      ],
       'conditions': [
+        [ 'target_arch == "x64"', {
+          # Source files optimized for X64 systems.
+          'sources': [
+            'base/simd/linear_scale_yuv_to_rgb_mmx_x64.asm',
+            'base/simd/scale_yuv_to_rgb_sse2_x64.asm',
+          ],
+        }],
         [ 'os_posix == 1 and OS != "mac"', {
           'cflags': [
             '-msse2',
+            '-msse3',
+            '-mssse3',
           ],
         }],
         [ 'OS == "mac"', {
@@ -428,10 +463,6 @@
           },
         }],
       ],
-      'sources': [
-        'base/yuv_convert_sse2.cc',
-        'base/simd/convert_rgb_to_yuv.cc',
-      ],
       'variables': {
         'yasm_output_path': '<(SHARED_INTERMEDIATE_DIR)/media',
       },
@@ -440,6 +471,18 @@
       ],
     },
     {
+      'target_name': 'yuv_convert_simd_arm',
+      'type': 'static_library',
+      'include_dirs': [
+        '..',
+      ],
+      'sources': [
+        'base/simd/convert_yuv_to_rgb_c.cc',
+        'base/simd/filter_yuv.h',
+        'base/simd/filter_yuv_c.cc',
+      ],
+    },
+    {
       'target_name': 'ffmpeg_unittests',
       'type': 'executable',
       'dependencies': [