diff options
author | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-03-18 10:16:43 +0100 |
---|---|---|
committer | Wolfgang Wiedmeyer <wolfgit@wiedmeyer.de> | 2016-03-18 10:16:43 +0100 |
commit | 7f2bb91aecbdc8af268c56f80034d8f3dc0204aa (patch) | |
tree | 59506f762d56cb9a31856509496553707fcfd6a9 | |
parent | 6309ce85786ff96bd04f62dd5cb62a7b0c69f763 (diff) | |
parent | fb7e634f3d725728d65e0c32c5d20c99d91a158e (diff) | |
download | system_core-7f2bb91aecbdc8af268c56f80034d8f3dc0204aa.zip system_core-7f2bb91aecbdc8af268c56f80034d8f3dc0204aa.tar.gz system_core-7f2bb91aecbdc8af268c56f80034d8f3dc0204aa.tar.bz2 |
Merge branch 'cm-13.0' of https://github.com/CyanogenMod/android_system_core into replicant-6.0replicant-6.0-alpha-0004
47 files changed, 15736 insertions, 73 deletions
diff --git a/adb/adb.cpp b/adb/adb.cpp index 0ff9e3c..9f9e6bd 100644 --- a/adb/adb.cpp +++ b/adb/adb.cpp @@ -919,7 +919,7 @@ int handle_host_request(char *service, transport_type ttype, char* serial, int r if(!strncmp(service,"get-state",strlen("get-state"))) { transport = acquire_one_transport(CS_ANY, ttype, serial, NULL); SendOkay(reply_fd); - SendProtocolString(reply_fd, transport->connection_state_name()); + SendProtocolString(reply_fd, transport ? transport->connection_state_name() : "unknown"); return 0; } #endif // ADB_HOST diff --git a/adb/commandline.cpp b/adb/commandline.cpp index cd635ce..eb0c84b 100644 --- a/adb/commandline.cpp +++ b/adb/commandline.cpp @@ -756,8 +756,10 @@ static int logcat(transport_type transport, const char* serial, int argc, const static int mkdirs(const char *path) { + std::string holder(path); + int ret; - char *x = (char *)path + 1; + char *x = &holder[1]; for(;;) { x = adb_dirstart(x); @@ -774,7 +776,7 @@ static int mkdirs(const char *path) } static int backup(int argc, const char** argv) { - const char* filename = "./backup.ab"; + const char* filename = "backup.ab"; /* find, extract, and use any -f argument */ for (int i = 1; i < argc; i++) { diff --git a/fastboot/fastboot.cpp b/fastboot/fastboot.cpp index ec3b84a..5112c15 100644 --- a/fastboot/fastboot.cpp +++ b/fastboot/fastboot.cpp @@ -279,7 +279,7 @@ void usage(void) "usage: fastboot [ <option> ] <command>\n" "\n" "commands:\n" - " update <filename> reflash device from update.zip\n" + " update <filename> [ -a <boot.img> ] reflash device from update.zip\n" " flashall flash boot, system, vendor and if found,\n" " recovery\n" " flash <partition> [ <filename> ] write a file to a flash partition\n" @@ -329,6 +329,9 @@ void usage(void) " default: 2048\n" " -S <size>[K|M|G] automatically sparse files greater\n" " than size. 0 to disable\n" + " -R reboot device (e.g. after flash)\n" + " -a <boot.img> use alternate <boot.img> instead of\n" + " boot.img in update.zip file\n" ); } @@ -740,7 +743,7 @@ void do_update_signature(ZipArchiveHandle zip, char *fn) fb_queue_command("signature", "installing signature"); } -void do_update(usb_handle *usb, const char *filename, int erase_first) +void do_update(usb_handle *usb, const char *filename, int erase_first, const char *alt_boot_fname) { queue_info_dump(); @@ -763,18 +766,27 @@ void do_update(usb_handle *usb, const char *filename, int erase_first) setup_requirements(reinterpret_cast<char*>(data), sz); for (size_t i = 0; i < ARRAY_SIZE(images); ++i) { - int fd = unzip_to_file(zip, images[i].img_name); - if (fd == -1) { - if (images[i].is_optional) { - continue; + fastboot_buffer buf; + // support alt images only for boot partition + bool from_zip = alt_boot_fname == NULL || + strncmp(images[i].part_name, "boot", sizeof(images[i].part_name)); + if (from_zip) { + int fd = unzip_to_file(zip, images[i].img_name); + if (fd == -1) { + if (images[i].is_optional) { + continue; + } + CloseArchive(zip); + exit(1); // unzip_to_file already explained why. } - CloseArchive(zip); - exit(1); // unzip_to_file already explained why. + int rc = load_buf_fd(usb, fd, &buf); + if (rc) die("cannot load %s from flash", images[i].img_name); + do_update_signature(zip, images[i].sig_name); + } else { + int rc = load_buf(usb, alt_boot_fname, &buf); + if (rc) die("cannot load %s", alt_boot_fname); } - fastboot_buffer buf; - int rc = load_buf_fd(usb, fd, &buf); - if (rc) die("cannot load %s from flash", images[i].img_name); - do_update_signature(zip, images[i].sig_name); + if (erase_first && needs_erase(usb, images[i].part_name)) { fb_queue_erase(images[i].part_name); } @@ -1017,8 +1029,10 @@ int main(int argc, char **argv) int status; int c; int longindex; + const char *alt_boot_fname = NULL; const struct option longopts[] = { + {"alt-boot", required_argument, 0, 'a'}, {"base", required_argument, 0, 'b'}, {"kernel_offset", required_argument, 0, 'k'}, {"page_size", required_argument, 0, 'n'}, @@ -1027,18 +1041,22 @@ int main(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"unbuffered", no_argument, 0, 0}, {"version", no_argument, 0, 0}, + {"reboot", no_argument, 0, 'R'}, {0, 0, 0, 0} }; serial = getenv("ANDROID_SERIAL"); while (1) { - c = getopt_long(argc, argv, "wub:k:n:r:t:s:S:lp:c:i:m:h", longopts, &longindex); + c = getopt_long(argc, argv, "wub:k:n:r:t:s:S:lp:c:i:m:hRa:", longopts, &longindex); if (c < 0) { break; } /* Alphabetical cases */ switch (c) { + case 'a': + alt_boot_fname = optarg; + break; case 'b': base_addr = strtoul(optarg, 0, 16); break; @@ -1074,6 +1092,9 @@ int main(int argc, char **argv) case 'r': ramdisk_offset = strtoul(optarg, 0, 16); break; + case 'R': + wants_reboot = 1; + break; case 't': tags_offset = strtoul(optarg, 0, 16); break; @@ -1251,10 +1272,10 @@ int main(int argc, char **argv) wants_reboot = 1; } else if(!strcmp(*argv, "update")) { if (argc > 1) { - do_update(usb, argv[1], erase_first); + do_update(usb, argv[1], erase_first, alt_boot_fname); skip(2); } else { - do_update(usb, "update.zip", erase_first); + do_update(usb, "update.zip", erase_first, alt_boot_fname); skip(1); } wants_reboot = 1; diff --git a/fs_mgr/fs_mgr_format.c b/fs_mgr/fs_mgr_format.c index e932990..8bda19c 100644 --- a/fs_mgr/fs_mgr_format.c +++ b/fs_mgr/fs_mgr_format.c @@ -33,7 +33,7 @@ extern void reset_ext4fs_info(); static int format_ext4(char *fs_blkdev, char *fs_mnt_point, long long fs_length) { - unsigned int nr_sec; + uint64_t dev_sz; int fd, rc = 0; if ((fd = open(fs_blkdev, O_WRONLY, 0644)) < 0) { @@ -41,7 +41,7 @@ static int format_ext4(char *fs_blkdev, char *fs_mnt_point, long long fs_length) return -1; } - if ((ioctl(fd, BLKGETSIZE, &nr_sec)) == -1) { + if ((ioctl(fd, BLKGETSIZE64, &dev_sz)) == -1) { ERROR("Cannot get block device size. %s\n", strerror(errno)); close(fd); return -1; @@ -49,7 +49,7 @@ static int format_ext4(char *fs_blkdev, char *fs_mnt_point, long long fs_length) /* Format the partition using the calculated length */ reset_ext4fs_info(); - info.len = ((off64_t)nr_sec * 512); + info.len = (off64_t)dev_sz; if (fs_length > 0) { info.len = fs_length; diff --git a/healthd/Android.mk b/healthd/Android.mk index e5fffc0..d1e005e 100644 --- a/healthd/Android.mk +++ b/healthd/Android.mk @@ -9,6 +9,12 @@ LOCAL_CFLAGS := -Werror include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) +LOCAL_SRC_FILES := healthd_board_msm.cpp +LOCAL_MODULE := libhealthd.msm +LOCAL_CFLAGS := -Werror +include $(BUILD_STATIC_LIBRARY) + +include $(CLEAR_VARS) LOCAL_SRC_FILES := \ healthd.cpp \ @@ -55,6 +61,10 @@ endif LOCAL_HAL_STATIC_LIBRARIES := libhealthd +ifeq ($(BOARD_USES_QCOM_HARDWARE),true) +BOARD_HAL_STATIC_LIBRARIES ?= libhealthd.msm +endif + # Symlink /charger to /sbin/healthd LOCAL_POST_INSTALL_CMD := $(hide) mkdir -p $(TARGET_ROOT_OUT) \ && rm -f $(TARGET_ROOT_OUT)/charger && ln -sf /sbin/healthd $(TARGET_ROOT_OUT)/charger diff --git a/healthd/healthd_board_msm.cpp b/healthd/healthd_board_msm.cpp new file mode 100644 index 0000000..d17095f --- /dev/null +++ b/healthd/healthd_board_msm.cpp @@ -0,0 +1,275 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include <cutils/android_reboot.h> +#include <cutils/klog.h> +#include <cutils/misc.h> +#include <cutils/uevent.h> +#include <cutils/properties.h> + +#include <pthread.h> +#include <linux/android_alarm.h> +#include <sys/timerfd.h> +#include <linux/rtc.h> + +#include <healthd.h> + +#define LOGE(x...) do { KLOG_ERROR("charger", x); } while (0) +#define LOGI(x...) do { KLOG_INFO("charger", x); } while (0) +#define LOGV(x...) do { KLOG_DEBUG("charger", x); } while (0) + +enum alarm_time_type { + ALARM_TIME, + RTC_TIME, +}; + +/* + * shouldn't be changed after + * reading from alarm register + */ +static time_t alm_secs; + +static int alarm_get_time(enum alarm_time_type time_type, + time_t *secs) +{ + struct tm tm; + unsigned int cmd; + int rc, fd = -1; + + if (!secs) + return -1; + + fd = open("/dev/rtc0", O_RDONLY); + if (fd < 0) { + LOGE("Can't open rtc devfs node\n"); + return -1; + } + + switch (time_type) { + case ALARM_TIME: + cmd = RTC_ALM_READ; + break; + case RTC_TIME: + cmd = RTC_RD_TIME; + break; + default: + LOGE("Invalid time type\n"); + goto err; + } + + rc = ioctl(fd, cmd, &tm); + if (rc < 0) { + LOGE("Unable to get time\n"); + goto err; + } + + *secs = mktime(&tm) + tm.tm_gmtoff; + if (*secs < 0) { + LOGE("Invalid seconds = %ld\n", *secs); + goto err; + } + + close(fd); + return 0; + +err: + close(fd); + return -1; +} + +#define ERR_SECS 2 +static int alarm_is_alm_expired() +{ + int rc; + time_t rtc_secs; + + rc = alarm_get_time(RTC_TIME, &rtc_secs); + if (rc < 0) + return 0; + + return (alm_secs >= rtc_secs - ERR_SECS && + alm_secs <= rtc_secs + ERR_SECS) ? 1 : 0; +} + +static int timerfd_set_reboot_time_and_wait(time_t secs) +{ + int fd; + int ret = -1; + fd = timerfd_create(CLOCK_REALTIME_ALARM, 0); + if (fd < 0) { + LOGE("Can't open timerfd alarm node\n"); + goto err_return; + } + + struct itimerspec spec; + memset(&spec, 0, sizeof(spec)); + spec.it_value.tv_sec = secs; + + if (timerfd_settime(fd, 0 /* relative */, &spec, NULL)) { + LOGE("Can't set timerfd alarm\n"); + goto err_close; + } + + uint64_t unused; + if (read(fd, &unused, sizeof(unused)) < 0) { + LOGE("Wait alarm error\n"); + goto err_close; + } + + ret = 0; +err_close: + close(fd); +err_return: + return ret; +} + +static int alarm_set_reboot_time_and_wait(time_t secs) +{ + int rc, fd; + struct timespec ts; + + fd = open("/dev/alarm", O_RDWR); + if (fd < 0) { + LOGE("Can't open alarm devfs node, trying timerfd\n"); + return timerfd_set_reboot_time_and_wait(secs); + } + + /* get the elapsed realtime from boot time to now */ + rc = ioctl(fd, ANDROID_ALARM_GET_TIME( + ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP), &ts); + if (rc < 0) { + LOGE("Unable to get elapsed realtime\n"); + goto err; + } + + /* calculate the elapsed time from boot time to reboot time */ + ts.tv_sec += secs; + ts.tv_nsec = 0; + + rc = ioctl(fd, ANDROID_ALARM_SET( + ANDROID_ALARM_ELAPSED_REALTIME_WAKEUP), &ts); + if (rc < 0) { + LOGE("Unable to set reboot time to %ld\n", secs); + goto err; + } + + do { + rc = ioctl(fd, ANDROID_ALARM_WAIT); + } while ((rc < 0 && errno == EINTR) || !alarm_is_alm_expired()); + + if (rc <= 0) { + LOGE("Unable to wait on alarm\n"); + goto err; + } + + close(fd); + return 0; + +err: + if (fd >= 0) + close(fd); + return -1; +} + +static void *alarm_thread(void *) +{ + time_t rtc_secs, rb_secs; + int rc; + + /* + * to support power off alarm, the time + * stored in alarm register at latest + * shutdown time should be some time + * earlier than the actual alarm time + * set by user + */ + rc = alarm_get_time(ALARM_TIME, &alm_secs); + if (rc < 0 || !alm_secs) + goto err; + + rc = alarm_get_time(RTC_TIME, &rtc_secs); + if (rc < 0) + goto err; + + /* + * calculate the reboot time after which + * the phone will reboot + */ + rb_secs = alm_secs - rtc_secs; + if (rb_secs <= 0) + goto err; + + rc = alarm_set_reboot_time_and_wait(rb_secs); + if (rc < 0) + goto err; + + LOGI("Exit from power off charging, reboot the phone!\n"); + android_reboot(ANDROID_RB_RESTART, 0, 0); + +err: + LOGE("Exit from alarm thread\n"); + return NULL; +} + +void healthd_board_init(struct healthd_config*) +{ + pthread_t tid; + int rc; + char value[PROP_VALUE_MAX]; + + property_get("ro.bootmode", value, ""); + if (!strcmp("charger", value)) { + rc = pthread_create(&tid, NULL, alarm_thread, NULL); + if (rc < 0) + LOGE("Create alarm thread failed\n"); + } +} + +int healthd_board_battery_update(struct android::BatteryProperties*) +{ + // return 0 to log periodic polled battery status to kernel log + return 1; +} + +void healthd_board_mode_charger_draw_battery(struct android::BatteryProperties*) +{ + +} + +void healthd_board_mode_charger_battery_update(struct android::BatteryProperties*) +{ + +} + +void healthd_board_mode_charger_set_backlight(bool) +{ + +} + +void healthd_board_mode_charger_init() +{ + +} diff --git a/healthd/healthd_mode_charger.cpp b/healthd/healthd_mode_charger.cpp index 1b6e216..eb93d6a 100644 --- a/healthd/healthd_mode_charger.cpp +++ b/healthd/healthd_mode_charger.cpp @@ -156,7 +156,7 @@ static struct frame batt_anim_frames[] = { { .disp_time = 750, .min_capacity = 80, - .level_only = true, + .level_only = false, .surface = NULL, }, { diff --git a/init/Android.mk b/init/Android.mk index 159c4f2..aa32236 100644 --- a/init/Android.mk +++ b/init/Android.mk @@ -85,7 +85,6 @@ LOCAL_STATIC_LIBRARIES := \ liblogwrap \ libcutils \ libbase \ - libext4_utils_static \ libutils \ liblog \ libc \ diff --git a/init/builtins.cpp b/init/builtins.cpp index e3e64f4..b290ce3 100644 --- a/init/builtins.cpp +++ b/init/builtins.cpp @@ -585,6 +585,9 @@ int do_powerctl(int nargs, char **args) } if (strncmp(command, "shutdown", 8) == 0) { + if (property_get_bool("init.shutdown_to_charging", false)) { + return android_reboot(ANDROID_RB_RESTART2, 0, "charging"); + } cmd = ANDROID_RB_POWEROFF; len = 8; } else if (strncmp(command, "reboot", 6) == 0) { diff --git a/init/property_service.cpp b/init/property_service.cpp index 11ff06b..fe82bef 100644 --- a/init/property_service.cpp +++ b/init/property_service.cpp @@ -150,6 +150,33 @@ int __property_get(const char *name, char *value) return __system_property_get(name, value); } +bool property_get_bool(const char *key, bool default_value) { + if (!key) { + return default_value; + } + + bool result = default_value; + char buf[PROP_VALUE_MAX] = {'\0',}; + + int len = __property_get(key, buf); + if (len == 1) { + char ch = buf[0]; + if (ch == '0' || ch == 'n') { + result = false; + } else if (ch == '1' || ch == 'y') { + result = true; + } + } else if (len > 1) { + if (!strcmp(buf, "no") || !strcmp(buf, "false") || !strcmp(buf, "off")) { + result = false; + } else if (!strcmp(buf, "yes") || !strcmp(buf, "true") || !strcmp(buf, "on")) { + result = true; + } + } + + return result; +} + static void write_persistent_property(const char *name, const char *value) { char tempPath[PATH_MAX]; diff --git a/init/property_service.h b/init/property_service.h index 303f251..6b542b5 100644 --- a/init/property_service.h +++ b/init/property_service.h @@ -28,6 +28,7 @@ extern void start_property_service(void); void get_property_workspace(int *fd, int *sz); extern int __property_get(const char *name, char *value); extern int property_set(const char *name, const char *value); +extern bool property_get_bool(const char *name, bool def_value); extern bool properties_initialized(); #ifndef __clang__ diff --git a/libcutils/sched_policy.c b/libcutils/sched_policy.c index 83222f4..70dc8c4 100644 --- a/libcutils/sched_policy.c +++ b/libcutils/sched_policy.c @@ -61,6 +61,7 @@ static int bg_cgroup_fd = -1; static int fg_cgroup_fd = -1; // File descriptors open to /dev/cpuset/../tasks, setup by initialize, or -1 on error +static int system_bg_cpuset_fd = -1; static int bg_cpuset_fd = -1; static int fg_cpuset_fd = -1; @@ -126,6 +127,8 @@ static void __initialize(void) { fg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC); filename = "/dev/cpuset/background/tasks"; bg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC); + filename = "/dev/cpuset/system-background/tasks"; + system_bg_cpuset_fd = open(filename, O_WRONLY | O_CLOEXEC); } #endif @@ -260,6 +263,9 @@ int set_cpuset_policy(int tid, SchedPolicy policy) case SP_AUDIO_SYS: fd = fg_cpuset_fd; break; + case SP_SYSTEM: + fd = system_bg_cpuset_fd; + break; default: fd = -1; break; diff --git a/liblog/Android.mk b/liblog/Android.mk index 115dd79..6714498 100644 --- a/liblog/Android.mk +++ b/liblog/Android.mk @@ -25,11 +25,13 @@ include $(CLEAR_VARS) liblog_cflags := -DLIBLOG_LOG_TAG=1005 ifneq ($(TARGET_USES_LOGD),false) -liblog_sources := logd_write.c log_event_write.c +liblog_sources := logd_write.c else liblog_sources := logd_write_kern.c endif +liblog_sources += log_event_write.c + # some files must not be compiled when building against Mingw # they correspond to features not used by our host development tools # which are also hard or even impossible to port to native Win32 diff --git a/libpixelflinger/Android.mk b/libpixelflinger/Android.mk index f02da7f..e6c9094 100644 --- a/libpixelflinger/Android.mk +++ b/libpixelflinger/Android.mk @@ -7,9 +7,16 @@ include $(CLEAR_VARS) include $(CLEAR_VARS) PIXELFLINGER_SRC_FILES:= \ + codeflinger/CodeCache.cpp \ + format.cpp \ + clear.cpp \ + raster.cpp \ + buffer.cpp + +ifeq ($(filter x86%,$(TARGET_ARCH)),) +PIXELFLINGER_SRC_FILES += \ codeflinger/ARMAssemblerInterface.cpp \ codeflinger/ARMAssemblerProxy.cpp \ - codeflinger/CodeCache.cpp \ codeflinger/GGLAssembler.cpp \ codeflinger/load_store.cpp \ codeflinger/blending.cpp \ @@ -19,10 +26,8 @@ PIXELFLINGER_SRC_FILES:= \ pixelflinger.cpp.arm \ trap.cpp.arm \ scanline.cpp.arm \ - format.cpp \ - clear.cpp \ - raster.cpp \ - buffer.cpp + +endif PIXELFLINGER_CFLAGS := -fstrict-aliasing -fomit-frame-pointer @@ -43,6 +48,18 @@ PIXELFLINGER_SRC_FILES_arm64 := \ arch-arm64/col32cb16blend.S \ arch-arm64/t32cb16blend.S \ +PIXELFLINGER_SRC_FILES_x86 := \ + codeflinger/x86/X86Assembler.cpp \ + codeflinger/x86/GGLX86Assembler.cpp \ + codeflinger/x86/load_store.cpp \ + codeflinger/x86/blending.cpp \ + codeflinger/x86/texturing.cpp \ + fixed.cpp \ + picker.cpp \ + pixelflinger.cpp \ + trap.cpp \ + scanline.cpp + ifndef ARCH_MIPS_REV6 PIXELFLINGER_SRC_FILES_mips := \ codeflinger/MIPSAssembler.cpp \ @@ -58,12 +75,16 @@ LOCAL_MODULE:= libpixelflinger LOCAL_SRC_FILES := $(PIXELFLINGER_SRC_FILES) LOCAL_SRC_FILES_arm := $(PIXELFLINGER_SRC_FILES_arm) LOCAL_SRC_FILES_arm64 := $(PIXELFLINGER_SRC_FILES_arm64) +LOCAL_SRC_FILES_x86 := $(PIXELFLINGER_SRC_FILES_x86) +LOCAL_SRC_FILES_x86_64 := $(PIXELFLINGER_SRC_FILES_x86) LOCAL_SRC_FILES_mips := $(PIXELFLINGER_SRC_FILES_mips) LOCAL_CFLAGS := $(PIXELFLINGER_CFLAGS) LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include LOCAL_C_INCLUDES += $(LOCAL_EXPORT_C_INCLUDE_DIRS) \ external/safe-iop/include LOCAL_SHARED_LIBRARIES := libcutils liblog libutils +LOCAL_WHOLE_STATIC_LIBRARIES_x86 := libenc +LOCAL_WHOLE_STATIC_LIBRARIES_x86_64 := libenc # Really this should go away entirely or at least not depend on # libhardware, but this at least gets us built. diff --git a/libpixelflinger/codeflinger/Android.mk b/libpixelflinger/codeflinger/Android.mk new file mode 100644 index 0000000..8004af7 --- /dev/null +++ b/libpixelflinger/codeflinger/Android.mk @@ -0,0 +1,3 @@ +ifneq ($(filter x86%,$(TARGET_ARCH)),) +include $(call all-named-subdir-makefiles,x86/libenc) +endif diff --git a/libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp b/libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp new file mode 100644 index 0000000..1b24503 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.cpp @@ -0,0 +1,1507 @@ +/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#define LOG_TAG "GGLX86Assembler" + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> +#include <cutils/log.h> + +#include "codeflinger/x86/GGLX86Assembler.h" + +namespace android { + +// ---------------------------------------------------------------------------- + +GGLX86Assembler::GGLX86Assembler(const sp<Assembly>& assembly) + : X86Assembler(assembly), X86RegisterAllocator(), mOptLevel(7) +{ +} + +GGLX86Assembler::~GGLX86Assembler() +{ +} + +void GGLX86Assembler::reset(int opt_level) +{ + X86Assembler::reset(); + X86RegisterAllocator::reset(); + mOptLevel = opt_level; +} + +// --------------------------------------------------------------------------- + +int GGLX86Assembler::scanline(const needs_t& needs, context_t const* c) +{ + int err = 0; + err = scanline_core(needs, c); + if (err != 0) + ALOGE("scanline_core failed probably due to running out of the registers: %d\n", err); + + // XXX: in theory, pcForLabel is not valid before generate() + char* fragment_start_pc = pcForLabel("fragment_loop"); + char* fragment_end_pc = pcForLabel("fragment_end"); + const int per_fragment_ins_size = int(fragment_end_pc - fragment_start_pc); + + // build a name for our pipeline + char name[128]; + sprintf(name, + "scanline__%08X:%08X_%08X_%08X [%3d ipp ins size]", + needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ins_size); + + if (err) { + ALOGE("Error while generating ""%s""\n", name); + disassemble(name); + return -1; + } + + return generate(name); +} + +int GGLX86Assembler::scanline_core(const needs_t& needs, context_t const* c) +{ + int64_t duration = ggl_system_time(); + + mBlendFactorCached = 0; + mBlending = 0; + mMasking = 0; + mAA = GGL_READ_NEEDS(P_AA, needs.p); + mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); + mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; + mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; + mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; + mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; + mBuilderContext.needs = needs; + mBuilderContext.c = c; + mBuilderContext.Rctx = obtainReg(); //dynamically obtain if used and then immediately recycle it if not used + mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; + + // ------------------------------------------------------------------------ + + decodeLogicOpNeeds(needs); + + decodeTMUNeeds(needs, c); + + mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); + mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); + mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); + mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); + + if (!mCbFormat.c[GGLFormat::ALPHA].h) { + if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || + (mBlendSrc == GGL_DST_ALPHA)) { + mBlendSrc = GGL_ONE; + } + if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || + (mBlendSrcA == GGL_DST_ALPHA)) { + mBlendSrcA = GGL_ONE; + } + if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || + (mBlendDst == GGL_DST_ALPHA)) { + mBlendDst = GGL_ONE; + } + if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || + (mBlendDstA == GGL_DST_ALPHA)) { + mBlendDstA = GGL_ONE; + } + } + + // if we need the framebuffer, read it now + const int blending = blending_codes(mBlendSrc, mBlendDst) | + blending_codes(mBlendSrcA, mBlendDstA); + + // XXX: handle special cases, destination not modified... + if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && + (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { + // Destination unmodified (beware of logic ops) + } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && + (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { + // Destination is zero (beware of logic ops) + } + + int fbComponents = 0; + const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); + for (int i=0 ; i<4 ; i++) { + const int mask = 1<<i; + component_info_t& info = mInfo[i]; + int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; + int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; + if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) + fs = GGL_ONE; + info.masked = !!(masking & mask); + info.inDest = !info.masked && mCbFormat.c[i].h && + ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); + if (mCbFormat.components >= GGL_LUMINANCE && + (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { + info.inDest = false; + } + info.needed = (i==GGLFormat::ALPHA) && + (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); + info.replaced = !!(mTextureMachine.replaced & mask); + info.iterated = (!info.replaced && (info.inDest || info.needed)); + info.smooth = mSmooth && info.iterated; + info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); + info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); + + mBlending |= (info.blend ? mask : 0); + mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; + fbComponents |= mCbFormat.c[i].h ? mask : 0; + } + + mAllMasked = (mMasking == fbComponents); + if (mAllMasked) { + mDithering = 0; + } + + fragment_parts_t parts; + + // ------------------------------------------------------------------------ + callee_work(); + // ------------------------------------------------------------------------ + + mCurSp = -12; // %ebx, %edi, %esi + prepare_esp(0); + build_scanline_preparation(parts, needs); + recycleReg(mBuilderContext.Rctx); + + if (registerFile().status()) + return registerFile().status(); + + // ------------------------------------------------------------------------ + label("fragment_loop"); + // ------------------------------------------------------------------------ + { + Scratch regs(registerFile()); + int temp_reg = -1; + + if (mDithering) { + // update the dither index. + temp_reg = regs.obtain(); + //To load to register and calculate should be fast than the memory operations + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg); + ROR(GGL_DITHER_ORDER_SHIFT, temp_reg); + ADD_IMM_TO_REG(1 << (32 - GGL_DITHER_ORDER_SHIFT), temp_reg); + ROR(32 - GGL_DITHER_ORDER_SHIFT, temp_reg); + MOV_REG_TO_MEM(temp_reg, parts.count.offset_ebp, PhysicalReg_EBP); + regs.recycle(temp_reg); + + } + + // XXX: could we do an early alpha-test here in some cases? + // It would probaly be used only with smooth-alpha and no texture + // (or no alpha component in the texture). + + // Early z-test + if (mAlphaTest==GGL_ALWAYS) { + build_depth_test(parts, Z_TEST|Z_WRITE); + } else { + // we cannot do the z-write here, because + // it might be killed by the alpha-test later + build_depth_test(parts, Z_TEST); + } + + { // texture coordinates + Scratch scratches(registerFile()); + + // texel generation + build_textures(parts, regs); + + } + + if ((blending & (FACTOR_DST|BLEND_DST)) || + (mMasking && !mAllMasked) || + (mLogicOp & LOGIC_OP_DST)) + { + // blending / logic_op / masking need the framebuffer + mDstPixel.setTo(regs.obtain(), &mCbFormat); + + // load the framebuffer pixel + comment("fetch color-buffer"); + parts.cbPtr.reg = regs.obtain(); + MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, PhysicalReg_EBP, parts.cbPtr.reg); + load(parts.cbPtr, mDstPixel); + mCurSp = mCurSp - 4; + mDstPixel.offset_ebp = mCurSp; + MOV_REG_TO_MEM(mDstPixel.reg, mDstPixel.offset_ebp, EBP); + regs.recycle(mDstPixel.reg); + regs.recycle(parts.cbPtr.reg); + mDstPixel.reg = -1; + } + + if (registerFile().status()) + return registerFile().status(); + + pixel_t pixel; + int directTex = mTextureMachine.directTexture; + if (directTex | parts.packed) { + // note: we can't have both here + // iterated color or direct texture + if(directTex) { + pixel.offset_ebp = parts.texel[directTex-1].offset_ebp; + } + else + pixel.offset_ebp = parts.iterated.offset_ebp; + pixel.reg = regs.obtain(); + MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg); + //pixel = directTex ? parts.texel[directTex-1] : parts.iterated; + pixel.flags &= ~CORRUPTIBLE; + } else { + if (mDithering) { + mBuilderContext.Rctx = regs.obtain(); + temp_reg = regs.obtain(); + const int ctxtReg = mBuilderContext.Rctx; + MOV_MEM_TO_REG(8, EBP, ctxtReg); + const int mask = GGL_DITHER_SIZE-1; + parts.dither = reg_t(regs.obtain()); + MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, parts.dither.reg); + AND_IMM_TO_REG(mask, parts.dither.reg); + ADD_REG_TO_REG(ctxtReg, parts.dither.reg); + MOVZX_MEM_TO_REG(OpndSize_8, parts.dither.reg, GGL_OFFSETOF(ditherMatrix), temp_reg); + MOV_REG_TO_REG(temp_reg, parts.dither.reg); + mCurSp = mCurSp - 4; + parts.dither.offset_ebp = mCurSp; + MOV_REG_TO_MEM(parts.dither.reg, parts.dither.offset_ebp, EBP); + regs.recycle(parts.dither.reg); + regs.recycle(temp_reg); + regs.recycle(mBuilderContext.Rctx); + + } + + // allocate a register for the resulting pixel + pixel.setTo(regs.obtain(), &mCbFormat, FIRST); + + build_component(pixel, parts, GGLFormat::ALPHA, regs); + + if (mAlphaTest!=GGL_ALWAYS) { + // only handle the z-write part here. We know z-test + // was successful, as well as alpha-test. + build_depth_test(parts, Z_WRITE); + } + + build_component(pixel, parts, GGLFormat::RED, regs); + build_component(pixel, parts, GGLFormat::GREEN, regs); + build_component(pixel, parts, GGLFormat::BLUE, regs); + + pixel.flags |= CORRUPTIBLE; + } + + if (registerFile().status()) { + return registerFile().status(); + } + + if (pixel.reg == -1) { + // be defensive here. if we're here it's probably + // that this whole fragment is a no-op. + pixel = mDstPixel; + } + + if (!mAllMasked) { + // logic operation + build_logic_op(pixel, regs); + + // masking + build_masking(pixel, regs); + + comment("store"); + parts.cbPtr.reg = regs.obtain(); + MOV_MEM_TO_REG(parts.cbPtr.offset_ebp, EBP, parts.cbPtr.reg); + store(parts.cbPtr, pixel, WRITE_BACK); + MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP); + regs.recycle(parts.cbPtr.reg); + regs.recycle(pixel.reg); + } + } + + if (registerFile().status()) + return registerFile().status(); + + // update the iterated color... + if (parts.reload != 3) { + build_smooth_shade(parts); + } + + // update iterated z + build_iterate_z(parts); + + // update iterated fog + build_iterate_f(parts); + + //SUB_IMM_TO_REG(1<<16, parts.count.reg); + SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP); + + JCC(Mnemonic_JNS, "fragment_loop"); + label("fragment_end"); + int update_esp_offset, shrink_esp_offset; + update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi + update_esp(update_esp_offset); + shrink_esp(shrink_esp_offset); + return_work(); + + if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { + if (mDepthTest!=GGL_ALWAYS) { + label("discard_before_textures"); + build_iterate_texture_coordinates(parts); + } + label("discard_after_textures"); + build_smooth_shade(parts); + build_iterate_z(parts); + build_iterate_f(parts); + if (!mAllMasked) { + //ADD_IMM_TO_REG(parts.cbPtr.size>>3, parts.cbPtr.reg); + ADD_IMM_TO_MEM(parts.cbPtr.size>>3, parts.cbPtr.offset_ebp, EBP); + } + SUB_IMM_TO_MEM(1<<16, parts.count.offset_ebp, EBP); + //SUB_IMM_TO_REG(1<<16, parts.count.reg); + JCC(Mnemonic_JNS, "fragment_loop"); + update_esp_offset = shrink_esp_offset = -mCurSp - 12; // 12 is ebx, esi, edi + update_esp(update_esp_offset); + shrink_esp(shrink_esp_offset); + return_work(); + } + + return registerFile().status(); +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_scanline_preparation( + fragment_parts_t& parts, const needs_t& needs) +{ + Scratch scratches(registerFile()); + + // compute count + comment("compute ct (# of pixels to process)"); + int temp_reg; + parts.count.setTo(obtainReg()); + int Rx = scratches.obtain(); + int Ry = scratches.obtain(); + // the only argument is +8 bytes relative to the current EBP + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(Rx, iterators.xl); + CONTEXT_LOAD(parts.count.reg, iterators.xr); + CONTEXT_LOAD(Ry, iterators.y); + + // parts.count = iterators.xr - Rx + SUB_REG_TO_REG(Rx, parts.count.reg); + SUB_IMM_TO_REG(1, parts.count.reg); + + if (mDithering) { + // parts.count.reg = 0xNNNNXXDD + // NNNN = count-1 + // DD = dither offset + // XX = 0xxxxxxx (x = garbage) + Scratch scratches(registerFile()); + int tx = scratches.obtain(); + int ty = scratches.obtain(); + + MOV_REG_TO_REG(Rx,tx); + AND_IMM_TO_REG(GGL_DITHER_MASK, tx); + MOV_REG_TO_REG(Ry,ty); + AND_IMM_TO_REG(GGL_DITHER_MASK, ty); + SHL(GGL_DITHER_ORDER_SHIFT, ty); + ADD_REG_TO_REG(ty, tx); + SHL(16, parts.count.reg); + OR_REG_TO_REG(tx, parts.count.reg); + scratches.recycle(tx); + scratches.recycle(ty); + } else { + // parts.count.reg = 0xNNNN0000 + // NNNN = count-1 + SHL(16, parts.count.reg); + } + mCurSp = mCurSp - 4; + parts.count.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg + MOV_REG_TO_MEM(parts.count.reg, parts.count.offset_ebp, EBP); + //PUSH(parts.count.reg); + recycleReg(parts.count.reg); + parts.count.reg=-1; + if (!mAllMasked) { + // compute dst ptr + comment("compute color-buffer pointer"); + const int cb_bits = mCbFormat.size*8; + int Rs = scratches.obtain(); + temp_reg = scratches.obtain(); + CONTEXT_LOAD(Rs, state.buffers.color.stride); + MOVSX_REG_TO_REG(OpndSize_16, Ry, temp_reg); + MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs); + IMUL(temp_reg, Rs); + scratches.recycle(temp_reg); + ADD_REG_TO_REG(Rx, Rs); + + parts.cbPtr.setTo(obtainReg(), cb_bits); + CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data); + reg_t temp_reg_t; + temp_reg_t.setTo(Rs); + base_offset(parts.cbPtr, parts.cbPtr, temp_reg_t); + + mCurSp = mCurSp - 4; + parts.cbPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg + MOV_REG_TO_MEM(parts.cbPtr.reg, parts.cbPtr.offset_ebp, EBP); + //PUSH(parts.cbPtr.reg); + recycleReg(parts.cbPtr.reg); + parts.cbPtr.reg=-1; + scratches.recycle(Rs); + } + + // init fog + const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); + if (need_fog) { + comment("compute initial fog coordinate"); + Scratch scratches(registerFile()); + int ydfdy = scratches.obtain(); + int dfdx = scratches.obtain(); + CONTEXT_LOAD(dfdx, generated_vars.dfdx); + IMUL(Rx, dfdx); + CONTEXT_LOAD(ydfdy, iterators.ydfdy); + ADD_REG_TO_REG(ydfdy, dfdx); // Rx * dfdx + ydfdy + CONTEXT_STORE(dfdx, generated_vars.f); + scratches.recycle(dfdx); + scratches.recycle(ydfdy); + } + + // init Z coordinate + if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { + parts.z = reg_t(obtainReg()); + comment("compute initial Z coordinate"); + Scratch scratches(registerFile()); + int dzdx = scratches.obtain(); + int ydzdy = parts.z.reg; + CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point + IMUL(Rx, dzdx); + CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point + ADD_REG_TO_REG(dzdx, ydzdy); // parts.z.reg = Rx * dzdx + ydzdy + + mCurSp = mCurSp - 4; + parts.z.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg + MOV_REG_TO_MEM(ydzdy, parts.z.offset_ebp, EBP); + //PUSH(ydzdy); + recycleReg(ydzdy); + parts.z.reg=-1; + + // we're going to index zbase of parts.count + // zbase = base + (xl-count + stride*y)*2 by arm + // !!! Actually, zbase = base + (xl + stride*y)*2 + int Rs = dzdx; + int zbase = scratches.obtain(); + temp_reg = zbase; + CONTEXT_LOAD(Rs, state.buffers.depth.stride); + MOVSX_REG_TO_REG(OpndSize_16, Rs, Rs); + MOV_REG_TO_REG(Ry, temp_reg); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg); + IMUL(temp_reg, Rs); + ADD_REG_TO_REG(Rx, Rs); + // load parts.count.reg + MOV_MEM_TO_REG(parts.count.offset_ebp, EBP, temp_reg); + SHR(16, temp_reg); + ADD_REG_TO_REG(temp_reg, Rs); + SHL(1, Rs); + CONTEXT_LOAD(zbase, state.buffers.depth.data); + ADD_REG_TO_REG(Rs, zbase); + CONTEXT_STORE(zbase, generated_vars.zbase); + scratches.recycle(zbase); + scratches.recycle(dzdx); + } + // the rgisters are all used up + + // init texture coordinates + init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); + scratches.recycle(Ry); + + // iterated color + init_iterated_color(parts, reg_t(Rx)); + + // init coverage factor application (anti-aliasing) + if (mAA) { + parts.covPtr.setTo(obtainReg(), 16); + CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage); + SHL(1, Rx); + ADD_REG_TO_REG(Rx, parts.covPtr.reg); + + mCurSp = mCurSp - 4; + parts.covPtr.offset_ebp = mCurSp; + MOV_REG_TO_MEM(parts.covPtr.reg, parts.covPtr.offset_ebp, EBP); + //PUSH(parts.covPtr.reg); + recycleReg(parts.covPtr.reg); + parts.covPtr.reg=-1; + } + scratches.recycle(Rx); +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_component( pixel_t& pixel, + fragment_parts_t& parts, + int component, + Scratch& regs) +{ + static char const * comments[] = {"alpha", "red", "green", "blue"}; + comment(comments[component]); + + // local register file + Scratch scratches(registerFile()); + const int dst_component_size = pixel.component_size(component); + + component_t temp(-1); + build_incoming_component( temp, dst_component_size, + parts, component, scratches, regs); + + if (mInfo[component].inDest) { + // blending... + build_blending( temp, mDstPixel, component, scratches ); + + // downshift component and rebuild pixel... + downshift(pixel, component, temp, parts.dither); + } +} + +void GGLX86Assembler::build_incoming_component( + component_t& temp, + int dst_size, + fragment_parts_t& parts, + int component, + Scratch& scratches, + Scratch& global_regs) +{ + const uint32_t component_mask = 1<<component; + + // Figure out what we need for the blending stage... + int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; + int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; + if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { + fs = GGL_ONE; + } + + // Figure out what we need to extract and for what reason + const int blending = blending_codes(fs, fd); + + // Are we actually going to blend? + const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); + + // expand the source if the destination has more bits + int need_expander = false; + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { + texture_unit_t& tmu = mTextureMachine.tmu[i]; + if ((tmu.format_idx) && + (parts.texel[i].component_size(component) < dst_size)) { + need_expander = true; + } + } + + // do we need to extract this component? + const bool multiTexture = mTextureMachine.activeUnits > 1; + const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && + (isAlphaSourceNeeded()); + int need_extract = mInfo[component].needed; + if (mInfo[component].inDest) + { + need_extract |= ((need_blending ? + (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); + need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); + need_extract |= mInfo[component].smooth; + need_extract |= mInfo[component].fog; + need_extract |= mDithering; + need_extract |= multiTexture; + } + + if (need_extract) { + Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; + component_t fragment; + + // iterated color + fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); + build_iterated_color(fragment, parts, component, regs); + + // texture environment (decal, modulate, replace) + build_texture_environment(fragment, parts, component, regs); + + // expand the source if the destination has more bits + if (need_expander && (fragment.size() < dst_size)) { + // we're here only if we fetched a texel + // (so we know for sure fragment is CORRUPTIBLE) + //fragment is stored on the stack + expand(fragment, fragment, dst_size); + } + + mCurSp = mCurSp - 4; + fragment.offset_ebp = mCurSp; + MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP); + regs.recycle(fragment.reg); + + // We have a few specific things to do for the alpha-channel + if ((component==GGLFormat::ALPHA) && + (mInfo[component].needed || fragment.size()<dst_size)) + { + // convert to integer_t first and make sure + // we don't corrupt a needed register + if (fragment.l) { + //component_t incoming(fragment); + // actually fragment is not corruptible + //modify(fragment, regs); + //MOV_REG_TO_REG(incoming.reg, fragment.reg); + SHR(fragment.l, fragment.offset_ebp, EBP); + fragment.h -= fragment.l; + fragment.l = 0; + } + + // I haven't found any case to trigger coverage and the following alpha test (mAlphaTest != GGL_ALWAYS) + fragment.reg = regs.obtain(); + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, fragment.reg); + + // coverage factor application + build_coverage_application(fragment, parts, regs); + // alpha-test + build_alpha_test(fragment, parts); + + MOV_REG_TO_MEM(fragment.reg, fragment.offset_ebp, EBP); + regs.recycle(fragment.reg); + + if (blend_needs_alpha_source) { + // We keep only 8 bits for the blending stage + const int shift = fragment.h <= 8 ? 0 : fragment.h-8; + + if (fragment.flags & CORRUPTIBLE) { + fragment.flags &= ~CORRUPTIBLE; + mAlphaSource.setTo(fragment.reg, + fragment.size(), fragment.flags, fragment.offset_ebp); + //mCurSp = mCurSp - 4; + //mAlphaSource.offset_ebp = mCurSp; + if (shift) { + SHR(shift, mAlphaSource.offset_ebp, EBP); + } + } else { + // XXX: it would better to do this in build_blend_factor() + // so we can avoid the extra MOV below. + mAlphaSource.setTo(regs.obtain(), + fragment.size(), CORRUPTIBLE); + mCurSp = mCurSp - 4; + mAlphaSource.offset_ebp = mCurSp; + if (shift) { + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg); + SHR(shift, mAlphaSource.reg); + } else { + MOV_MEM_TO_REG(fragment.offset_ebp, EBP, mAlphaSource.reg); + } + MOV_REG_TO_MEM(mAlphaSource.reg, mAlphaSource.offset_ebp, EBP); + regs.recycle(mAlphaSource.reg); + } + mAlphaSource.s -= shift; + + } + } + + // fog... + build_fog( fragment, component, regs ); + + temp = fragment; + } else { + if (mInfo[component].inDest) { + // extraction not needed and replace + // we just select the right component + if ((mTextureMachine.replaced & component_mask) == 0) { + // component wasn't replaced, so use it! + temp = component_t(parts.iterated, component); + } + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { + const texture_unit_t& tmu = mTextureMachine.tmu[i]; + if ((tmu.mask & component_mask) && + ((tmu.replaced & component_mask) == 0)) { + temp = component_t(parts.texel[i], component); + } + } + } + } +} + +bool GGLX86Assembler::isAlphaSourceNeeded() const +{ + // XXX: also needed for alpha-test + const int bs = mBlendSrc; + const int bd = mBlendDst; + return bs==GGL_SRC_ALPHA_SATURATE || + bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || + bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_smooth_shade(fragment_parts_t& parts) +{ + if (mSmooth && !parts.iterated_packed) { + // update the iterated color in a pipelined way... + comment("update iterated color"); + Scratch scratches(registerFile()); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + + const int reload = parts.reload; + for (int i=0 ; i<4 ; i++) { + if (!mInfo[i].iterated) + continue; + + int dx = parts.argb_dx[i].reg; + int c = parts.argb[i].reg; + dx = scratches.obtain(); + c = scratches.obtain(); + CONTEXT_LOAD(dx, generated_vars.argb[i].dx); + CONTEXT_LOAD(c, generated_vars.argb[i].c); + + //if (reload & 1) { + // c = scratches.obtain(); + // CONTEXT_LOAD(c, generated_vars.argb[i].c); + //} + //if (reload & 2) { + // dx = scratches.obtain(); + // CONTEXT_LOAD(dx, generated_vars.argb[i].dx); + //} + + if (mSmooth) { + ADD_REG_TO_REG(dx, c); + } + + CONTEXT_STORE(c, generated_vars.argb[i].c); + scratches.recycle(c); + scratches.recycle(dx); + //if (reload & 1) { + // CONTEXT_STORE(c, generated_vars.argb[i].c); + // scratches.recycle(c); + //} + //if (reload & 2) { + // scratches.recycle(dx); + //} + } + scratches.recycle(mBuilderContext.Rctx); + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_coverage_application(component_t& fragment, + fragment_parts_t& parts, Scratch& regs) +{ + // here fragment.l is guarenteed to be 0 + if (mAA) { + // coverages are 1.15 fixed-point numbers + comment("coverage application"); + + component_t incoming(fragment); + modify(fragment, regs); + + Scratch scratches(registerFile()); + int cf = scratches.obtain(); + parts.covPtr.reg = scratches.obtain(); + MOV_MEM_TO_REG(parts.covPtr.offset_ebp, EBP, parts.covPtr.reg); + MOVZX_MEM_TO_REG(OpndSize_16, parts.covPtr.reg, 2, cf); // refer to LDRH definition + scratches.recycle(parts.covPtr.reg); + if (fragment.h > 31) { + fragment.h--; + + int flag_push_edx = 0; + int flag_reserve_edx = 0; + int temp_reg2 = -1; + int edx_offset_ebp = 0; + if(scratches.isUsed(EDX) == 1) { + if(incoming.reg != EDX && cf != EDX) { + flag_push_edx = 1; + mCurSp = mCurSp - 4; + edx_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); + } + } + else { + flag_reserve_edx = 1; + scratches.reserve(EDX); + } + if(scratches.isUsed(EAX)) { + if( cf == EAX || incoming.reg == EAX) { + MOVSX_REG_TO_REG(OpndSize_16, cf, cf); + if(cf == EAX) + IMUL(incoming.reg); + else + IMUL(cf); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, incoming.reg); + } + else { + int eax_offset_ebp = 0; + if(scratches.countFreeRegs() > 0) { + temp_reg2 = scratches.obtain(); + MOV_REG_TO_REG(EAX, temp_reg2); + } + else { + mCurSp = mCurSp - 4; + eax_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); + } + MOV_REG_TO_REG(cf, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(incoming.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, incoming.reg); + if(temp_reg2 > -1) { + MOV_REG_TO_REG(temp_reg2, EAX); + scratches.recycle(temp_reg2); + } + else { + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); + } + } + } + else { + MOV_REG_TO_REG(cf, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(incoming.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, incoming.reg); + } + if(flag_push_edx == 1) { + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); + } + if(flag_reserve_edx ==1) + scratches.recycle(EDX); + + MOV_REG_TO_REG(incoming.reg, fragment.reg); + + //IMUL(cf, incoming.reg); + } else { + MOV_REG_TO_REG(incoming.reg, fragment.reg); + SHL(1, fragment.reg); + + int flag_push_edx = 0; + int flag_reserve_edx = 0; + int temp_reg2 = -1; + int edx_offset_ebp = 0; + if(scratches.isUsed(EDX) == 1) { + if(fragment.reg != EDX && cf != EDX) { + flag_push_edx = 1; + mCurSp = mCurSp - 4; + edx_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); + } + } + else { + flag_reserve_edx = 1; + scratches.reserve(EDX); + } + if(scratches.isUsed(EAX)) { + if( cf == EAX || fragment.reg == EAX) { + MOVSX_REG_TO_REG(OpndSize_16, cf, cf); + if(cf == EAX) + IMUL(fragment.reg); + else + IMUL(cf); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, fragment.reg); + } + else { + int eax_offset_ebp = 0; + if(scratches.countFreeRegs() > 0) { + temp_reg2 = scratches.obtain(); + MOV_REG_TO_REG(EAX, temp_reg2); + } + else { + mCurSp = mCurSp - 4; + eax_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); + } + MOV_REG_TO_REG(cf, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(fragment.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, fragment.reg); + if(temp_reg2 > -1) { + MOV_REG_TO_REG(temp_reg2, EAX); + scratches.recycle(temp_reg2); + } + else { + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); + } + } + } + else { + MOV_REG_TO_REG(cf, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(fragment.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, fragment.reg); + } + if(flag_push_edx == 1) { + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); + } + if(flag_reserve_edx ==1) + scratches.recycle(EDX); + + //IMUL(cf, fragment.reg); + } + scratches.recycle(cf); + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_alpha_test(component_t& fragment, + const fragment_parts_t& parts) +{ + if (mAlphaTest != GGL_ALWAYS) { + comment("Alpha Test"); + Scratch scratches(registerFile()); + int ref = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + const int shift = GGL_COLOR_BITS-fragment.size(); + CONTEXT_LOAD(ref, state.alpha_test.ref); + scratches.recycle(mBuilderContext.Rctx); + if (shift) { + SHR(shift, ref); + CMP_REG_TO_REG(ref, fragment.reg); + } else CMP_REG_TO_REG(ref, fragment.reg); + Mnemonic cc = Mnemonic_NULL; + //int cc = NV; + switch (mAlphaTest) { + case GGL_NEVER: + JMP("discard_after_textures"); + return; + break; + case GGL_LESS: + cc = Mnemonic_JNL; + break; + case GGL_EQUAL: + cc = Mnemonic_JNE; + break; + case GGL_LEQUAL: + cc = Mnemonic_JB; + break; + case GGL_GREATER: + cc = Mnemonic_JLE; + break; + case GGL_NOTEQUAL: + cc = Mnemonic_JE; + break; + case GGL_GEQUAL: + cc = Mnemonic_JNC; + break; + } + JCC(cc, "discard_after_textures"); + //B(cc^1, "discard_after_textures"); + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_depth_test( + const fragment_parts_t& parts, uint32_t mask) +{ + mask &= Z_TEST|Z_WRITE; + int store_flag = 0; + const needs_t& needs = mBuilderContext.needs; + const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); + Scratch scratches(registerFile()); + + if (mDepthTest != GGL_ALWAYS || zmask) { + Mnemonic ic = Mnemonic_NULL; + switch (mDepthTest) { + case GGL_LESS: + ic = Mnemonic_JBE; + break; + case GGL_EQUAL: + ic = Mnemonic_JNE; + break; + case GGL_LEQUAL: + ic = Mnemonic_JB; + break; + case GGL_GREATER: + ic = Mnemonic_JGE; + break; + case GGL_NOTEQUAL: + ic = Mnemonic_JE; + break; + case GGL_GEQUAL: + ic = Mnemonic_JA; + break; + case GGL_NEVER: + // this never happens, because it's taken care of when + // computing the needs. but we keep it for completness. + comment("Depth Test (NEVER)"); + JMP("discard_before_textures"); + return; + case GGL_ALWAYS: + // we're here because zmask is enabled + mask &= ~Z_TEST; // test always passes. + break; + } + + + if ((mask & Z_WRITE) && !zmask) { + mask &= ~Z_WRITE; + } + + if (!mask) + return; + + comment("Depth Test"); + + int zbase = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(zbase, generated_vars.zbase); // stall + scratches.recycle(mBuilderContext.Rctx); + + int temp_reg1 = scratches.obtain(); + int depth = scratches.obtain(); + int z = parts.z.reg; + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, temp_reg1); + SHR(15, temp_reg1); + SUB_REG_TO_REG(temp_reg1, zbase); + + // above does zbase = zbase + ((count >> 16) << 1) + + if (mask & Z_TEST) { + MOVZX_MEM_TO_REG(OpndSize_16, zbase, 0, depth); + MOV_MEM_TO_REG(parts.z.offset_ebp, PhysicalReg_EBP, temp_reg1); + SHR(16, temp_reg1); + CMP_REG_TO_REG(temp_reg1, depth); + JCC(ic, "discard_before_textures"); + + } + if (mask & Z_WRITE) { + if (mask == Z_WRITE) { + // only z-write asked, cc is meaningless + store_flag = 1; + } + // actually it must be stored since the above branch is not taken + MOV_REG_TO_MEM(temp_reg1, 0, zbase, OpndSize_16); + } + scratches.recycle(temp_reg1); + scratches.recycle(zbase); + scratches.recycle(depth); + } +} + +void GGLX86Assembler::build_iterate_z(const fragment_parts_t& parts) +{ + const needs_t& needs = mBuilderContext.needs; + if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { + Scratch scratches(registerFile()); + int dzdx = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall + scratches.recycle(mBuilderContext.Rctx); + ADD_REG_TO_MEM(dzdx, EBP, parts.z.offset_ebp); + scratches.recycle(dzdx); + } +} + +void GGLX86Assembler::build_iterate_f(const fragment_parts_t& parts) +{ + const needs_t& needs = mBuilderContext.needs; + if (GGL_READ_NEEDS(P_FOG, needs.p)) { + Scratch scratches(registerFile()); + int dfdx = scratches.obtain(); + int f = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(f, generated_vars.f); + CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall + ADD_REG_TO_REG(dfdx, f); + CONTEXT_STORE(f, generated_vars.f); + scratches.recycle(mBuilderContext.Rctx); + scratches.recycle(dfdx); + scratches.recycle(f); + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_logic_op(pixel_t& pixel, Scratch& regs) +{ + const needs_t& needs = mBuilderContext.needs; + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; + if (opcode == GGL_COPY) + return; + + comment("logic operation"); + + pixel_t s(pixel); + if (!(pixel.flags & CORRUPTIBLE)) { + pixel.reg = regs.obtain(); + pixel.flags |= CORRUPTIBLE; + } + + pixel_t d(mDstPixel); + d.reg = regs.obtain(); + MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, d.reg); + switch(opcode) { + case GGL_CLEAR: + MOV_IMM_TO_REG(0, pixel.reg); + break; + case GGL_AND: + MOV_REG_TO_REG(d.reg, pixel.reg); + AND_REG_TO_REG(s.reg, pixel.reg); + break; + case GGL_AND_REVERSE: + MOV_REG_TO_REG(d.reg, pixel.reg); + NOT(pixel.reg); + AND_REG_TO_REG(s.reg, pixel.reg); + break; + case GGL_COPY: + break; + case GGL_AND_INVERTED: + MOV_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + AND_REG_TO_REG(d.reg, pixel.reg); + break; + case GGL_NOOP: + MOV_REG_TO_REG(d.reg, pixel.reg); + break; + case GGL_XOR: + MOV_REG_TO_REG(d.reg, pixel.reg); + XOR(s.reg, pixel.reg); + break; + case GGL_OR: + MOV_REG_TO_REG(d.reg, pixel.reg); + OR_REG_TO_REG(s.reg, pixel.reg); + break; + case GGL_NOR: + MOV_REG_TO_REG(d.reg, pixel.reg); + OR_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_EQUIV: + MOV_REG_TO_REG(d.reg, pixel.reg); + XOR(s.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_INVERT: + MOV_REG_TO_REG(d.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_OR_REVERSE: // s | ~d == ~(~s & d) + MOV_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + AND_REG_TO_REG(d.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_COPY_INVERTED: + MOV_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) + MOV_REG_TO_REG(d.reg, pixel.reg); + NOT(pixel.reg); + AND_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_NAND: + MOV_REG_TO_REG(d.reg, pixel.reg); + AND_REG_TO_REG(s.reg, pixel.reg); + NOT(pixel.reg); + break; + case GGL_SET: + MOV_IMM_TO_REG(0, pixel.reg); + NOT(pixel.reg); + break; + }; + regs.recycle(d.reg); +} + +// --------------------------------------------------------------------------- + + +void GGLX86Assembler::build_and_immediate(int d, int s, uint32_t mask, int bits) +{ + uint32_t rot; + uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; + mask &= size; + + if (mask == size) { + if (d != s) + MOV_REG_TO_REG(s, d); + return; + } + + MOV_REG_TO_REG(s, d); + AND_IMM_TO_REG(mask, d); +} + +void GGLX86Assembler::build_masking(pixel_t& pixel, Scratch& regs) +{ + if (!mMasking || mAllMasked) { + return; + } + + comment("color mask"); + + pixel_t fb(mDstPixel); + fb.reg = regs.obtain(); + MOV_MEM_TO_REG(mDstPixel.offset_ebp, EBP, fb.reg); + pixel_t s(pixel); + if (!(pixel.flags & CORRUPTIBLE)) { + pixel.reg = regs.obtain(); + pixel.flags |= CORRUPTIBLE; + } + + int mask = 0; + for (int i=0 ; i<4 ; i++) { + const int component_mask = 1<<i; + const int h = fb.format.c[i].h; + const int l = fb.format.c[i].l; + if (h && (!(mMasking & component_mask))) { + mask |= ((1<<(h-l))-1) << l; + } + } + + // There is no need to clear the masked components of the source + // (unless we applied a logic op), because they're already zeroed + // by construction (masked components are not computed) + + if (mLogicOp) { + const needs_t& needs = mBuilderContext.needs; + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; + if (opcode != GGL_CLEAR) { + // clear masked component of source + build_and_immediate(pixel.reg, s.reg, mask, fb.size()); + s = pixel; + } + } + + // clear non masked components of destination + build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); + + // or back the channels that were masked + if (s.reg == fb.reg) { + // this is in fact a MOV + if (s.reg == pixel.reg) { + // ugh. this in in fact a nop + } else { + MOV_REG_TO_REG(fb.reg, pixel.reg); + } + } else { + MOV_REG_TO_REG(fb.reg, pixel.reg); + OR_REG_TO_REG(s.reg, pixel.reg); + } + MOV_REG_TO_MEM(fb.reg, mDstPixel.offset_ebp, EBP); +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::base_offset(pointer_t& d, pointer_t& b, const reg_t& o) +{ +// d and b are the same reference + Scratch scratches(registerFile()); + int temp_reg = scratches.obtain(); + switch (b.size) { + case 32: + MOV_REG_TO_REG(b.reg, temp_reg); + MOV_REG_TO_REG(o.reg, d.reg); + SHL(2,d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + break; + case 24: + if (d.reg == b.reg) { + MOV_REG_TO_REG(b.reg, temp_reg); + MOV_REG_TO_REG(o.reg, d.reg); + SHL(1,d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + ADD_REG_TO_REG(o.reg, d.reg); + } else { + MOV_REG_TO_REG(o.reg, temp_reg); + SHL(1,temp_reg); + MOV_REG_TO_REG(temp_reg, d.reg); + ADD_REG_TO_REG(o.reg, d.reg); + ADD_REG_TO_REG(b.reg, d.reg); + } + break; + case 16: + MOV_REG_TO_REG(b.reg, temp_reg); + MOV_REG_TO_REG(o.reg, d.reg); + SHL(1,d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + break; + case 8: + MOV_REG_TO_REG(b.reg, temp_reg); + MOV_REG_TO_REG(o.reg, d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + break; + } + scratches.recycle(temp_reg); +} + +// ---------------------------------------------------------------------------- +// cheezy register allocator... +// ---------------------------------------------------------------------------- + +void X86RegisterAllocator::reset() +{ + mRegs.reset(); +} + +int X86RegisterAllocator::reserveReg(int reg) +{ + return mRegs.reserve(reg); +} + +int X86RegisterAllocator::obtainReg() +{ + return mRegs.obtain(); +} + +void X86RegisterAllocator::recycleReg(int reg) +{ + mRegs.recycle(reg); +} + +X86RegisterAllocator::RegisterFile& X86RegisterAllocator::registerFile() +{ + return mRegs; +} + +// ---------------------------------------------------------------------------- + +X86RegisterAllocator::RegisterFile::RegisterFile() + : mRegs(0), mTouched(0), mStatus(0) +{ + //reserve(PhysicalReg_EBP); + //reserve(PhysicalReg_ESP); +} + +X86RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs) + : mRegs(rhs.mRegs), mTouched(rhs.mTouched) +{ +} + +X86RegisterAllocator::RegisterFile::~RegisterFile() +{ +} + +bool X86RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const +{ + return (mRegs == rhs.mRegs); +} + +void X86RegisterAllocator::RegisterFile::reset() +{ + mRegs = mTouched = mStatus = 0; +} + +int X86RegisterAllocator::RegisterFile::reserve(int reg) +{ + LOG_ALWAYS_FATAL_IF(isUsed(reg), + "reserving register %d, but already in use", + reg); + if(isUsed(reg)) return -1; + mRegs |= (1<<reg); + mTouched |= mRegs; + return reg; +} + +void X86RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) +{ + mRegs |= regMask; + mTouched |= regMask; +} + +int X86RegisterAllocator::RegisterFile::isUsed(int reg) const +{ + LOG_ALWAYS_FATAL_IF(reg>=6, "invalid register %d", reg); + return mRegs & (1<<reg); +} + +int X86RegisterAllocator::RegisterFile::obtain() +{ +//multiplication result is in edx:eax +//ebx, ecx, edi, esi, eax, edx + const char priorityList[6] = { PhysicalReg_EBX, PhysicalReg_ECX,PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_EAX, PhysicalReg_EDX }; + + const int nbreg = sizeof(priorityList); + int i, r; + for (i=0 ; i<nbreg ; i++) { + r = priorityList[i]; + if (!isUsed(r)) { + break; + } + } + // this is not an error anymore because, we'll try again with + // a lower optimization level. + ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); + if (i >= nbreg) { + mStatus |= OUT_OF_REGISTERS; + // we return SP so we can more easily debug things + // the code will never be run anyway. + printf("pixelflinger ran out of registers\n"); + return PhysicalReg_ESP; + //return -1; + } + reserve(r); + return r; +} + +bool X86RegisterAllocator::RegisterFile::hasFreeRegs() const +{ + return ((mRegs & 0x3F) == 0x3F) ? false : true; +} + +int X86RegisterAllocator::RegisterFile::countFreeRegs() const +{ + int f = ~mRegs & 0x3F; + // now count number of 1 + f = (f & 0x5555) + ((f>>1) & 0x5555); + f = (f & 0x3333) + ((f>>2) & 0x3333); + f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); + f = (f & 0x00FF) + ((f>>8) & 0x00FF); + return f; +} + +void X86RegisterAllocator::RegisterFile::recycle(int reg) +{ + LOG_FATAL_IF(!isUsed(reg), + "recycling unallocated register %d", + reg); + mRegs &= ~(1<<reg); +} + +void X86RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) +{ + LOG_FATAL_IF((mRegs & regMask)!=regMask, + "recycling unallocated registers " + "(recycle=%08x, allocated=%08x, unallocated=%08x)", + regMask, mRegs, mRegs®Mask); + mRegs &= ~regMask; +} + +uint32_t X86RegisterAllocator::RegisterFile::touched() const +{ + return mTouched; +} + +// ---------------------------------------------------------------------------- + +}; // namespace android diff --git a/libpixelflinger/codeflinger/x86/GGLX86Assembler.h b/libpixelflinger/codeflinger/x86/GGLX86Assembler.h new file mode 100644 index 0000000..1960cfc --- /dev/null +++ b/libpixelflinger/codeflinger/x86/GGLX86Assembler.h @@ -0,0 +1,563 @@ +/* libs/pixelflinger/codeflinger/x86/GGLX86Assembler.h +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + + +#ifndef ANDROID_GGLX86ASSEMBLER_H +#define ANDROID_GGLX86ASSEMBLER_H + +#include <stdint.h> +#include <sys/types.h> + +#include <private/pixelflinger/ggl_context.h> + +#include "codeflinger/x86/X86Assembler.h" + + +namespace android { + +// ---------------------------------------------------------------------------- + +#define CONTEXT_LOAD(REG, FIELD) \ + MOV_MEM_TO_REG(GGL_OFFSETOF(FIELD), mBuilderContext.Rctx, REG) + +#define CONTEXT_STORE(REG, FIELD) \ + MOV_REG_TO_MEM(REG, GGL_OFFSETOF(FIELD), mBuilderContext.Rctx) + +class X86RegisterAllocator +{ +public: + class RegisterFile; + + RegisterFile& registerFile(); + int reserveReg(int reg); + int obtainReg(); + void recycleReg(int reg); + void reset(); + + class RegisterFile + { + public: + RegisterFile(); + RegisterFile(const RegisterFile& rhs); + ~RegisterFile(); + + void reset(); + + bool operator == (const RegisterFile& rhs) const; + bool operator != (const RegisterFile& rhs) const { + return !operator == (rhs); + } + + int reserve(int reg); + void reserveSeveral(uint32_t regMask); + + void recycle(int reg); + void recycleSeveral(uint32_t regMask); + + int obtain(); + inline int isUsed(int reg) const; + + bool hasFreeRegs() const; + int countFreeRegs() const; + + uint32_t touched() const; + inline uint32_t status() const { return mStatus; } + + enum { + OUT_OF_REGISTERS = 0x1 + }; + + private: + uint32_t mRegs; + uint32_t mTouched; + uint32_t mStatus; + }; + + class Scratch + { + public: + Scratch(RegisterFile& regFile) + : mRegFile(regFile), mScratch(0) { + } + ~Scratch() { + mRegFile.recycleSeveral(mScratch); + } + int obtain() { + int reg = mRegFile.obtain(); + mScratch |= 1<<reg; + return reg; + } + void reserve(int reg) { + mRegFile.reserve(reg); + mScratch |= 1<<reg; + } + void recycle(int reg) { + mRegFile.recycle(reg); + mScratch &= ~(1<<reg); + } + bool isUsed(int reg) { + return (mScratch & (1<<reg)); + } + int countFreeRegs() { + return mRegFile.countFreeRegs(); + } + private: + RegisterFile& mRegFile; + uint32_t mScratch; + }; + +/* +// currently we don't use it + + class Spill + { + public: + Spill(RegisterFile& regFile, X86Assembler& gen, uint32_t reglist) + : mRegFile(regFile), mGen(gen), mRegList(reglist), mCount(0) + { + if (reglist) { + int count = 0; + while (reglist) { + count++; + reglist &= ~(1 << (31 - __builtin_clz(reglist))); + } + if (count == 1) { + int reg = 31 - __builtin_clz(mRegList); + // move to the stack + } else { + // move to the stack + } + mRegFile.recycleSeveral(mRegList); + mCount = count; + } + } + ~Spill() { + if (mRegList) { + if (mCount == 1) { + int reg = 31 - __builtin_clz(mRegList); + // move to the stack + } else { + } + mRegFile.reserveSeveral(mRegList); + } + } + private: + RegisterFile& mRegFile; + X86Assembler& mGen; + uint32_t mRegList; + int mCount; + }; +*/ + +private: + RegisterFile mRegs; +}; + +// ---------------------------------------------------------------------------- + +class GGLX86Assembler : public X86Assembler, public X86RegisterAllocator +{ +public: + + GGLX86Assembler(const sp<Assembly>& assembly); + ~GGLX86Assembler(); + + char* base() const { return 0; } // XXX + char* pc() const { return 0; } // XXX + + void reset(int opt_level); + + + // generate scanline code for given needs + int scanline(const needs_t& needs, context_t const* c); + int scanline_core(const needs_t& needs, context_t const* c); + + enum { + CLEAR_LO = 0x0001, + CLEAR_HI = 0x0002, + CORRUPTIBLE = 0x0004, + FIRST = 0x0008 + }; + + enum { //load/store flags + WRITE_BACK = 0x0001 + }; + + struct reg_t { + reg_t() : reg(-1), flags(0), offset_ebp(0) { + } + reg_t(int r, int f=0, int offset=0) + : reg(r), flags(f), offset_ebp(offset) { + } + void setTo(int r, int f=0, int offset=0) { + reg=r; flags=f; offset_ebp=offset; + } + int reg; + uint16_t flags; + int offset_ebp; + }; + + struct integer_t : public reg_t { + integer_t() : reg_t(), s(0) { + } + integer_t(int r, int sz=32, int f=0, int offset=0) + : reg_t(r, f, offset), s(sz) { + } + void setTo(int r, int sz=32, int f=0, int offset=0) { + reg_t::setTo(r, f, offset); s=sz; + } + int8_t s; + inline int size() const { return s; } + }; + + struct pixel_t : public reg_t { + pixel_t() : reg_t() { + memset(&format, 0, sizeof(GGLFormat)); + } + pixel_t(int r, const GGLFormat* fmt, int f=0, int offset=0) + : reg_t(r, f, offset), format(*fmt) { + } + void setTo(int r, const GGLFormat* fmt, int f=0, int offset=0) { + reg_t::setTo(r, f, offset); format = *fmt; + } + GGLFormat format; + inline int hi(int c) const { return format.c[c].h; } + inline int low(int c) const { return format.c[c].l; } + inline int mask(int c) const { return ((1<<size(c))-1) << low(c); } + inline int size() const { return format.size*8; } + inline int size(int c) const { return component_size(c); } + inline int component_size(int c) const { return hi(c) - low(c); } + }; + + struct component_t : public reg_t { + component_t() : reg_t(), h(0), l(0) { + } + component_t(int r, int f=0, int offset=0) + : reg_t(r, f, offset), h(0), l(0) { + } + component_t(int r, int lo, int hi, int f=0, int offset=0) + : reg_t(r, f, offset), h(hi), l(lo) { + } + explicit component_t(const integer_t& rhs) + : reg_t(rhs.reg, rhs.flags, rhs.offset_ebp), h(rhs.s), l(0) { + } + explicit component_t(const pixel_t& rhs, int component) { + setTo( rhs.reg, + rhs.format.c[component].l, + rhs.format.c[component].h, + rhs.flags|CLEAR_LO|CLEAR_HI, rhs.offset_ebp); + } + void setTo(int r, int lo=0, int hi=0, int f=0, int offset=0) { + reg_t::setTo(r, f, offset); h=hi; l=lo; + } + int8_t h; + int8_t l; + inline int size() const { return h-l; } + }; + + struct pointer_t : public reg_t { + pointer_t() : reg_t(), size(0) { + } + pointer_t(int r, int s, int f=0, int offset=0) + : reg_t(r, f, offset), size(s) { + } + void setTo(int r, int s, int f=0, int offset=0) { + reg_t::setTo(r, f, offset); size=s; + } + int8_t size; + }; + + +private: + struct tex_coord_t { + reg_t s; + reg_t t; + pointer_t ptr; + }; + + struct fragment_parts_t { + uint32_t packed : 1; + uint32_t reload : 2; + uint32_t iterated_packed : 1; + pixel_t iterated; + pointer_t cbPtr; + pointer_t covPtr; + reg_t count; + reg_t argb[4]; + reg_t argb_dx[4]; + reg_t z; + reg_t dither; + pixel_t texel[GGL_TEXTURE_UNIT_COUNT]; + tex_coord_t coords[GGL_TEXTURE_UNIT_COUNT]; + }; + + struct texture_unit_t { + int format_idx; + GGLFormat format; + int bits; + int swrap; + int twrap; + int env; + int pot; + int linear; + uint8_t mask; + uint8_t replaced; + }; + + struct texture_machine_t { + texture_unit_t tmu[GGL_TEXTURE_UNIT_COUNT]; + uint8_t mask; + uint8_t replaced; + uint8_t directTexture; + uint8_t activeUnits; + }; + + struct component_info_t { + bool masked : 1; + bool inDest : 1; + bool needed : 1; + bool replaced : 1; + bool iterated : 1; + bool smooth : 1; + bool blend : 1; + bool fog : 1; + }; + + struct builder_context_t { + context_t const* c; + needs_t needs; + int Rctx; + }; + + template <typename T> + void modify(T& r, Scratch& regs) + { + if (!(r.flags & CORRUPTIBLE)) { + r.reg = regs.obtain(); + r.flags |= CORRUPTIBLE; + } + } + + // helpers + void base_offset(pointer_t& d, pointer_t& b, const reg_t& o); + + // texture environement + void modulate( component_t& dest, + const component_t& incoming, + const pixel_t& texel, int component); + + void decal( component_t& dest, + const component_t& incoming, + const pixel_t& texel, int component); + + void blend( component_t& dest, + const component_t& incoming, + const pixel_t& texel, int component, int tmu); + + void add( component_t& dest, + const component_t& incoming, + const pixel_t& texel, int component); + + // load/store stuff + void store(const pointer_t& addr, const pixel_t& src, uint32_t flags=0); + void load(pointer_t& addr, const pixel_t& dest, uint32_t flags=0); + + void extract(integer_t& d, const pixel_t& s, int component); + void extract(component_t& d, const pixel_t& s, int component); + void extract(integer_t& d, int s, int h, int l, int bits=32); + void expand(integer_t& d, const integer_t& s, int dbits); + void expand(integer_t& d, const component_t& s, int dbits); + void expand(component_t& d, const component_t& s, int dbits); + void downshift(pixel_t& d, int component, component_t s, reg_t& dither); + + + void mul_factor( component_t& d, + const integer_t& v, + const integer_t& f, Scratch& scratches); + + void mul_factor_add( component_t& d, + const integer_t& v, + const integer_t& f, + const component_t& a); + + void component_add( component_t& d, + const integer_t& dst, + const integer_t& src); + + void component_sat( const component_t& v, const int temp_reg); + + + void build_scanline_preparation(fragment_parts_t& parts, + const needs_t& needs); + + void build_smooth_shade(fragment_parts_t& parts); + + void build_component( pixel_t& pixel, + fragment_parts_t& parts, + int component, + Scratch& global_scratches); + + void build_incoming_component( + component_t& temp, + int dst_size, + fragment_parts_t& parts, + int component, + Scratch& scratches, + Scratch& global_scratches); + + void init_iterated_color(fragment_parts_t& parts, const reg_t& x); + + void build_iterated_color( component_t& fragment, + fragment_parts_t& parts, + int component, + Scratch& regs); + + void decodeLogicOpNeeds(const needs_t& needs); + + void decodeTMUNeeds(const needs_t& needs, context_t const* c); + + void init_textures( tex_coord_t* coords, + const reg_t& x, + const reg_t& y); + + void build_textures( fragment_parts_t& parts, + Scratch& regs); + + void filter8( const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches); + + void filter16( const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches); + + void filter24( const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + int U, int V, pointer_t& txPtr, + int FRAC_BITS); + + void filter32( const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches); + + void build_texture_environment( component_t& fragment, + fragment_parts_t& parts, + int component, + Scratch& regs); + + void wrapping( int d, + int coord, int size, + int tx_wrap, int tx_linear, Scratch& scratches); + + void build_fog( component_t& temp, + int component, + Scratch& parent_scratches); + + void build_blending( component_t& in_out, + pixel_t& pixel, + int component, + Scratch& parent_scratches); + + void build_blend_factor( + integer_t& factor, int f, int component, + const pixel_t& dst_pixel, + integer_t& fragment, + integer_t& fb, + Scratch& scratches); + + void build_blendFOneMinusF( component_t& temp, + const integer_t& factor, + const integer_t& fragment, + const integer_t& fb); + + void build_blendOneMinusFF( component_t& temp, + const integer_t& factor, + const integer_t& fragment, + const integer_t& fb); + + void build_coverage_application(component_t& fragment, + fragment_parts_t& parts, + Scratch& regs); + + void build_alpha_test(component_t& fragment, const fragment_parts_t& parts); + + enum { Z_TEST=1, Z_WRITE=2 }; + void build_depth_test(const fragment_parts_t& parts, uint32_t mask); + void build_iterate_z(const fragment_parts_t& parts); + void build_iterate_f(const fragment_parts_t& parts); + void build_iterate_texture_coordinates(const fragment_parts_t& parts); + + void build_logic_op(pixel_t& pixel, Scratch& regs); + + void build_masking(pixel_t& pixel, Scratch& regs); + + void build_and_immediate(int d, int s, uint32_t mask, int bits); + + bool isAlphaSourceNeeded() const; + + enum { + FACTOR_SRC=1, FACTOR_DST=2, BLEND_SRC=4, BLEND_DST=8 + }; + + enum { + LOGIC_OP=1, LOGIC_OP_SRC=2, LOGIC_OP_DST=4 + }; + + static int blending_codes(int fs, int fd); + + builder_context_t mBuilderContext; + texture_machine_t mTextureMachine; + component_info_t mInfo[4]; + int mBlending; + int mMasking; + int mAllMasked; + int mLogicOp; + int mAlphaTest; + int mAA; + int mDithering; + int mDepthTest; + + int mSmooth; + int mFog; + pixel_t mDstPixel; + + GGLFormat mCbFormat; + + int mBlendFactorCached; + integer_t mAlphaSource; + + int mBaseRegister; + + int mBlendSrc; + int mBlendDst; + int mBlendSrcA; + int mBlendDstA; + + int mOptLevel; + + // to stretch esp and shrink esp + int mCurSp; +}; + +// ---------------------------------------------------------------------------- + +}; // namespace android + +#endif // ANDROID_GGLX86ASSEMBLER_H diff --git a/libpixelflinger/codeflinger/x86/X86Assembler.cpp b/libpixelflinger/codeflinger/x86/X86Assembler.cpp new file mode 100644 index 0000000..2a717ac --- /dev/null +++ b/libpixelflinger/codeflinger/x86/X86Assembler.cpp @@ -0,0 +1,618 @@ +/* libs/pixelflinger/codeflinger/x86/X86Assembler.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#define LOG_TAG "X86Assembler" + +#include <stdio.h> +#include <stdlib.h> +#include <cutils/log.h> +#include <cutils/properties.h> +#include <string.h> + +#if defined(WITH_LIB_HARDWARE) +#include <hardware_legacy/qemu_tracing.h> +#endif + +#include <private/pixelflinger/ggl_context.h> + +#include "codeflinger/CodeCache.h" +#include "codeflinger/x86/X86Assembler.h" + +// ---------------------------------------------------------------------------- + +namespace android { + +// ---------------------------------------------------------------------------- + +X86Assembler::X86Assembler(const sp<Assembly>& assembly) + : mAssembly(assembly) +{ + mBase = mStream = (char *)assembly->base(); + mDuration = ggl_system_time(); +#if defined(WITH_LIB_HARDWARE) + mQemuTracing = true; +#endif +} + +X86Assembler::~X86Assembler() +{ +} + +char* X86Assembler::pc() const +{ + return mStream; +} + +char* X86Assembler::base() const +{ + return mBase; +} + +void X86Assembler::reset() +{ + mBase = mStream = (char *)mAssembly->base(); + mBranchTargets.clear(); + mLabels.clear(); + mLabelsInverseMapping.clear(); + mComments.clear(); +} + +// ---------------------------------------------------------------------------- + +void X86Assembler::disassemble(const char* name) +{ + if (name) { + printf("%s:\n", name); + } + size_t count = pc()-base(); + unsigned insLength; + unsigned insSize; + char* curStream = (char*)base(); + while (count>0) { + ssize_t label = mLabelsInverseMapping.indexOfKey(curStream); + if (label >= 0) { + printf("%s:\n", mLabelsInverseMapping.valueAt(label)); + } + ssize_t comment = mComments.indexOfKey(curStream); + if (comment >= 0) { + printf("; %s\n", mComments.valueAt(comment)); + } + insLength = decodeThenPrint(curStream); + curStream = curStream + insLength; + count = count - insLength; + } +} + +void X86Assembler::comment(const char* string) +{ + mComments.add(mStream, string); +} + +void X86Assembler::label(const char* theLabel) +{ + mLabels.add(theLabel, mStream); + mLabelsInverseMapping.add(mStream, theLabel); +} + +//the conditional jump +void X86Assembler::JCC(Mnemonic cc, const char* label) { + switch (cc) { + case Mnemonic_JO: + encoder_imm(Mnemonic_JO, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNO: + encoder_imm(Mnemonic_JNO, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JB: + encoder_imm(Mnemonic_JB, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNB: + encoder_imm(Mnemonic_JNB, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JZ: + encoder_imm(Mnemonic_JZ, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNZ: + encoder_imm(Mnemonic_JNZ, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JBE: + encoder_imm(Mnemonic_JBE, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNBE: + encoder_imm(Mnemonic_JNBE, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JS: + encoder_imm(Mnemonic_JS, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNS: + encoder_imm(Mnemonic_JNS, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JP: + encoder_imm(Mnemonic_JP, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNP: + encoder_imm(Mnemonic_JNP, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JL: + encoder_imm(Mnemonic_JL, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNL: + encoder_imm(Mnemonic_JNL, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JLE: + encoder_imm(Mnemonic_JLE, OpndSize_32, 0/*imm*/, mStream); + break; + case Mnemonic_JNLE: + encoder_imm(Mnemonic_JNLE, OpndSize_32, 0/*imm*/, mStream); + break; + default : + printf("the condition is not supported.\n"); + return; + } + mStreamNext = mStream + encoder_get_inst_size(mStream); + //the offset is relative to the next instruction of the current PC + mBranchTargets.add(branch_target_t(label, mStream, mStreamNext)); + mStream = mStreamNext; +} + +void X86Assembler::JMP(const char* label) { + encoder_imm(Mnemonic_JMP, OpndSize_32, 0/*imm*/, mStream); + mStreamNext = mStream + encoder_get_inst_size(mStream); + mBranchTargets.add(branch_target_t(label, mStream, mStreamNext)); + mStream = mStreamNext; +} + +void X86Assembler::prepare_esp(int old_offset) +{ + mStreamUpdate = mStream; + SUB_IMM_TO_REG(old_offset, ESP); +} + +void X86Assembler::update_esp(int new_offset) +{ + encoder_update_imm_rm(new_offset, mStreamUpdate); +} + +void X86Assembler::shrink_esp(int shrink_offset) +{ + ADD_IMM_TO_REG(shrink_offset, ESP); +} + +void X86Assembler::callee_work() +{ + //push EBX, ESI, EDI which need to be done in callee + /* + push %ebp + mov %esp,%ebp + push %ebx + push %esi + push %edi + */ + PUSH(EBP); + MOV_REG_TO_REG(ESP, EBP); + PUSH(EBX); + PUSH(ESI); + PUSH(EDI); +} + +void X86Assembler::return_work() +{ +// pop %esi +// pop %edi +// pop %ebx +// movl %ebp,%esp +// pop %ebp +// ret +// ret is equivalent to below +// pop %eax // the return address +// jmp *%eax + POP(EDI); + POP(ESI); + POP(EBX); + POP(EBP); + encoder_return(mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +int X86Assembler::generate(const char* name) +{ + // fixup all the branches + size_t count = mBranchTargets.size(); + while (count--) { + const branch_target_t& bt = mBranchTargets[count]; + char* target_pc = mLabels.valueFor(bt.label); + LOG_ALWAYS_FATAL_IF(!target_pc, + "error resolving branch targets, target_pc is null"); + //the offset is relative to the next instruction of the current PC + int32_t offset = int32_t(target_pc - bt.next_pc); + encoder_update_imm(offset, bt.pc); + } + + mAssembly->resize((int)(pc()-base())); + + // the instruction cache is flushed by CodeCache + const int64_t duration = ggl_system_time() - mDuration; + const char * const format = "generated %s (%d ins size) at [%p:%p] in %lld ns\n"; + ALOGI(format, name, int(pc()-base()), base(), pc(), duration); + +#if defined(WITH_LIB_HARDWARE) + if (__builtin_expect(mQemuTracing, 0)) { + int err = qemu_add_mapping(uintptr_t(base()), name); + mQemuTracing = (err >= 0); + } +#endif + + char value[PROPERTY_VALUE_MAX]; + property_get("debug.pf.disasm", value, "0"); + if (atoi(value) != 0) { + printf(format, name, int(pc()-base()), base(), pc(), duration); + disassemble(name); + } + + return NO_ERROR; +} + +char* X86Assembler::pcForLabel(const char* label) +{ + return mLabels.valueFor(label); +} + +// ---------------------------------------------------------------------------- + +void X86Assembler::PUSH(int reg) { + encoder_reg(Mnemonic_PUSH, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::POP(int reg) { + encoder_reg(Mnemonic_POP, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +//arithmetic +void X86Assembler::ADD_REG_TO_REG(int src, int dst) { + encoder_reg_reg(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ADD_IMM_TO_REG(int imm, int dst) { + encoder_imm_reg(Mnemonic_ADD, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ADD_IMM_TO_MEM(int imm, int disp, int dst) { + encoder_imm_mem(Mnemonic_ADD, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ADD_MEM_TO_REG(int base_reg, int disp, int dst) { + encoder_mem_reg(Mnemonic_ADD, OpndSize_32, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ADD_REG_TO_MEM(int src, int base_reg, int disp) { + encoder_reg_mem(Mnemonic_ADD, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SUB_REG_TO_REG(int src, int dst) { + encoder_reg_reg(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SUB_IMM_TO_REG(int imm, int dst) { + encoder_imm_reg(Mnemonic_SUB, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SUB_IMM_TO_MEM(int imm, int disp, int dst) { + encoder_imm_mem(Mnemonic_SUB, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SUB_REG_TO_MEM(int src, int base_reg, int disp) { + encoder_reg_mem(Mnemonic_SUB, OpndSize_32, src, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +//test +void X86Assembler::TEST_REG_TO_REG(int src, int dst, OpndSize size) { + encoder_reg_reg(Mnemonic_TEST, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +//compare +void X86Assembler::CMP_REG_TO_REG(int src, int dst, OpndSize size) { + encoder_reg_reg(Mnemonic_CMP, size, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::CMP_IMM_TO_REG(int imm, int dst) { + encoder_imm_reg(Mnemonic_CMP, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size) { + encoder_mem_reg(Mnemonic_CMP, size, disp, base_reg, 0/*isBasePhysical*/, + dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size) +{ + encoder_reg_mem(Mnemonic_CMP, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +//logical +void X86Assembler::AND_REG_TO_REG(int src, int dst) { + encoder_reg_reg(Mnemonic_AND, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::AND_IMM_TO_REG(int imm, int dst) { + encoder_imm_reg(Mnemonic_AND, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::OR_REG_TO_REG(int src, int dst) { + encoder_reg_reg(Mnemonic_OR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::XOR(int src, int dst) { + encoder_reg_reg(Mnemonic_XOR, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::OR_IMM_TO_REG(int imm, int dst) { + encoder_imm_reg(Mnemonic_OR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::NOT(int dst) { + encoder_reg(Mnemonic_NOT, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::NEG(int dst) { + encoder_reg(Mnemonic_NEG, OpndSize_32, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} +//shift +void X86Assembler::SHL(int imm, int dst) { + encoder_imm_reg(Mnemonic_SHL, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SHL(int imm, int disp, int dst) { + encoder_imm_mem(Mnemonic_SHL, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SHR(int imm, int dst) { + encoder_imm_reg(Mnemonic_SHR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SHR(int imm, int disp, int dst) { + encoder_imm_mem(Mnemonic_SHR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::SAR(int imm, int dst) { + encoder_imm_reg(Mnemonic_SAR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ROR(const int imm, int dst) { + encoder_imm_reg(Mnemonic_ROR, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::ROR(int imm, int disp, int dst) { + encoder_imm_mem(Mnemonic_ROR, OpndSize_32, imm, disp, dst, 0/*isBasePhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} +//signed extension +void X86Assembler::MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) { + encoder_moves_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOVSX_REG_TO_REG(OpndSize size, int src, int dst) { + encoder_moves_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} +//zero entension +void X86Assembler::MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst) { + encoder_movez_mem_to_reg(size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOVZX_REG_TO_REG(OpndSize size, int src, int dst) { + encoder_movez_reg_to_reg(size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +// multiply... +// the first source operand is placed in EAX +void X86Assembler::IMUL(int reg) { + encoder_reg(Mnemonic_IMUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::IMUL(int src, int dst) { + encoder_reg_reg(Mnemonic_IMUL, OpndSize_32, src, 0/*isPhysical*/, dst/*dst is the destination*/, 0/*isPhysical2*/,LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MUL(int reg) { + encoder_reg(Mnemonic_MUL, OpndSize_32, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + + +// data transfer... +void X86Assembler::MOV_IMM_TO_REG(int32_t imm, int dst) { + encoder_imm_reg(Mnemonic_MOV, OpndSize_32, imm, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOV_REG_TO_REG(int src, int dst, OpndSize size) +{ + if(src == dst) return; + encoder_reg_reg(Mnemonic_MOV, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size) +{ + encoder_reg_mem(Mnemonic_MOV, size, reg, 0/*isPhysical*/, disp, base_reg, 0/*isBasePhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size) +{ + encoder_mem_reg(Mnemonic_MOV, size, disp, base_reg, 0/*isBasePhysical*/, + reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size) +{ + encoder_mem_scale_reg(Mnemonic_MOV, size, base_reg, 0/*isBasePhysical*/, index_reg, 0/*isIndexPhysical*/, scale, reg, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + mStream = mStream + encoder_get_inst_size(mStream); +} +// the conditional move +void X86Assembler::CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size) +{ + switch (cc) { + case Mnemonic_CMOVO: + encoder_reg_reg(Mnemonic_CMOVO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNO: + encoder_reg_reg(Mnemonic_CMOVNO, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVB: + encoder_reg_reg(Mnemonic_CMOVB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNB: + encoder_reg_reg(Mnemonic_CMOVNB, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVZ: + encoder_reg_reg(Mnemonic_CMOVZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNZ: + encoder_reg_reg(Mnemonic_CMOVNZ, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVBE: + encoder_reg_reg(Mnemonic_CMOVBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNBE: + encoder_reg_reg(Mnemonic_CMOVNBE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVS: + encoder_reg_reg(Mnemonic_CMOVS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNS: + encoder_reg_reg(Mnemonic_CMOVNS, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVP: + encoder_reg_reg(Mnemonic_CMOVP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNP: + encoder_reg_reg(Mnemonic_CMOVNP, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVL: + encoder_reg_reg(Mnemonic_CMOVL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNL: + encoder_reg_reg(Mnemonic_CMOVNL, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVLE: + encoder_reg_reg(Mnemonic_CMOVLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNLE: + encoder_reg_reg(Mnemonic_CMOVNLE, size, src, 0/*isPhysical*/, dst, 0/*isPhysical2*/, LowOpndRegType_gp, mStream); + break; + default : + printf("the condition is not supported.\n"); + return; + } + mStream = mStream + encoder_get_inst_size(mStream); +} + +void X86Assembler::CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size) +{ + switch (cc) { + case Mnemonic_CMOVO: + encoder_mem_reg(Mnemonic_CMOVO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNO: + encoder_mem_reg(Mnemonic_CMOVNO, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVB: + encoder_mem_reg(Mnemonic_CMOVB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNB: + encoder_mem_reg(Mnemonic_CMOVNB, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVZ: + encoder_mem_reg(Mnemonic_CMOVZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNZ: + encoder_mem_reg(Mnemonic_CMOVNZ, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVBE: + encoder_mem_reg(Mnemonic_CMOVBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNBE: + encoder_mem_reg(Mnemonic_CMOVNBE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVS: + encoder_mem_reg(Mnemonic_CMOVS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNS: + encoder_mem_reg(Mnemonic_CMOVNS, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVP: + encoder_mem_reg(Mnemonic_CMOVP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNP: + encoder_mem_reg(Mnemonic_CMOVNP, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVL: + encoder_mem_reg(Mnemonic_CMOVL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNL: + encoder_mem_reg(Mnemonic_CMOVNL, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVLE: + encoder_mem_reg(Mnemonic_CMOVLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + case Mnemonic_CMOVNLE: + encoder_mem_reg(Mnemonic_CMOVNLE, size, disp, base_reg, 0/*isBasePhysical*/, dst, 0/*isPhysical*/, LowOpndRegType_gp, mStream); + break; + default : + printf("the condition is not supported.\n"); + return; + } + mStream = mStream + encoder_get_inst_size(mStream); +} + +}; // namespace android diff --git a/libpixelflinger/codeflinger/x86/X86Assembler.h b/libpixelflinger/codeflinger/x86/X86Assembler.h new file mode 100644 index 0000000..03502d5 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/X86Assembler.h @@ -0,0 +1,163 @@ +/* libs/pixelflinger/codeflinger/x86/X86Assembler.h +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#ifndef ANDROID_X86ASSEMBLER_H +#define ANDROID_X86ASSEMBLER_H + +#include <stdint.h> +#include <sys/types.h> + +#include <utils/Vector.h> +#include <utils/KeyedVector.h> + +#include "codeflinger/tinyutils/smartpointer.h" +#include "codeflinger/CodeCache.h" +#include "enc_wrapper.h" + +namespace android { + +// ---------------------------------------------------------------------------- + +class X86Assembler +{ +public: + + enum { + EAX = PhysicalReg_EAX, EBX = PhysicalReg_EBX, ECX = PhysicalReg_ECX, + EDX = PhysicalReg_EDX, EDI = PhysicalReg_EDI, ESI = PhysicalReg_ESI, + ESP = PhysicalReg_ESP, EBP = PhysicalReg_EBP + }; + + X86Assembler(const sp<Assembly>& assembly); + ~X86Assembler(); + + char* base() const; + char* pc() const; + + + void disassemble(const char* name); + + // ------------------------------------------------------------------------ + // X86AssemblerInterface... + // ------------------------------------------------------------------------ + + void reset(); + + int generate(const char* name); + + void comment(const char* string); + + void label(const char* theLabel); + + void JCC(Mnemonic cc, const char* label); + + void JMP(const char* label); + + void prepare_esp(int old_offset); + + void update_esp(int new_offset); + + void shrink_esp(int shrink_offset); + + void callee_work(); + + void return_work(); + + char* pcForLabel(const char* label); + + void PUSH(int reg); + + void POP(int reg); + + void ADD_REG_TO_REG(int src, int dst); + void ADD_IMM_TO_REG(int imm, int dst); + void ADD_IMM_TO_MEM(int imm, int disp, int dst); + void ADD_MEM_TO_REG(int base_reg, int disp, int dst); + void ADD_REG_TO_MEM(int src, int base_reg, int disp); + void SUB_REG_TO_REG(int src, int dst); + void SUB_IMM_TO_REG(int imm, int dst); + void SUB_IMM_TO_MEM(int imm, int disp, int dst); + void SUB_REG_TO_MEM(int src, int base_reg, int disp); + + void TEST_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); + void CMP_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); + void CMP_MEM_TO_REG(int base_reg, int disp, int dst, OpndSize size=OpndSize_32); + void CMP_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32); + void CMP_IMM_TO_REG(int imm, int dst); + + void AND_REG_TO_REG(int src, int dst); + void AND_IMM_TO_REG(int imm, int dst); + void OR_REG_TO_REG(int src, int dst); + void XOR(int src, int dst); + void OR_IMM_TO_REG(int imm, int dst); + void NOT(int dst); + void NEG(int dst); + void SHL(int imm, int dst); + void SHL(int imm, int disp, int dst); + void SHR(int imm, int dst); + void SHR(int imm, int disp, int dst); + void SAR(int imm, int dst); + void ROR(const int imm, int dst); + void ROR(int imm, int disp, int dst); + void IMUL(int reg); + void IMUL(int src, int dst); + void MUL(int reg); + + void MOVSX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst); + void MOVSX_REG_TO_REG(OpndSize size, int src, int dst); + void MOVZX_MEM_TO_REG(OpndSize size, int base_reg, int disp, int dst); + void MOVZX_REG_TO_REG(OpndSize size, int src, int dst); + void MOV_IMM_TO_REG(int32_t imm, int dst); + void MOV_REG_TO_REG(int src, int dst, OpndSize size=OpndSize_32); + void MOV_MEM_TO_REG(int disp, int base_reg, int reg, OpndSize size=OpndSize_32); + void MOV_REG_TO_MEM(int reg, int disp, int base_reg, OpndSize size=OpndSize_32); + void MOV_MEM_SCALE_TO_REG(int base_reg, int index_reg, int scale, int reg, OpndSize size=OpndSize_32); + void CMOV_REG_TO_REG(Mnemonic cc, int src, int dst, OpndSize size=OpndSize_32); + void CMOV_MEM_TO_REG(Mnemonic cc, int disp, int base_reg, int dst, OpndSize size=OpndSize_32); + + + sp<Assembly> mAssembly; + char* mBase; + char* mStream; + //branch target offset is relative to the next instruction + char* mStreamNext; + //updating esp after iterating the loop + char* mStreamUpdate; + + int64_t mDuration; +#if defined(WITH_LIB_HARDWARE) + bool mQemuTracing; +#endif + + struct branch_target_t { + inline branch_target_t() : label(0), pc(0), next_pc(0) { } + inline branch_target_t(const char* l, char* p, char* next_p) + : label(l), pc(p), next_pc(next_p) { } + const char* label; + char* pc; + char* next_pc; + }; + + Vector<branch_target_t> mBranchTargets; + KeyedVector< const char*, char* > mLabels; + KeyedVector< char*, const char* > mLabelsInverseMapping; + KeyedVector< char*, const char* > mComments; +}; + +}; // namespace android + +#endif //ANDROID_X86ASSEMBLER_H diff --git a/libpixelflinger/codeflinger/x86/blending.cpp b/libpixelflinger/codeflinger/x86/blending.cpp new file mode 100644 index 0000000..f918ffd --- /dev/null +++ b/libpixelflinger/codeflinger/x86/blending.cpp @@ -0,0 +1,974 @@ +/* libs/pixelflinger/codeflinger/x86/blending.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> + +#include <cutils/log.h> + +#include "codeflinger/x86/GGLX86Assembler.h" + + +namespace android { + +void GGLX86Assembler::build_fog( + component_t& temp, // incomming fragment / output + int component, + Scratch& regs) +{ + if (mInfo[component].fog) { + Scratch scratches(registerFile()); + comment("fog"); + + temp.reg = scratches.obtain(); + MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg); + integer_t fragment(temp.reg, temp.h, temp.flags, temp.offset_ebp); + if (!(temp.flags & CORRUPTIBLE)) { + temp.reg = regs.obtain(); + temp.flags |= CORRUPTIBLE; + } + + integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + MOVZX_MEM_TO_REG(OpndSize_8, mBuilderContext.Rctx, GGL_OFFSETOF(state.fog.color[component]), fogColor.reg); + + integer_t factor(scratches.obtain(), 16, CORRUPTIBLE); + CONTEXT_LOAD(factor.reg, generated_vars.f); + scratches.recycle(mBuilderContext.Rctx); + + // clamp fog factor (TODO: see if there is a way to guarantee + // we won't overflow, when setting the iterators) + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(factor.reg, temp_reg); + SAR(31, temp_reg); + NOT(temp_reg); + AND_REG_TO_REG(temp_reg, factor.reg); + MOV_IMM_TO_REG(0x10000, temp_reg); + CMP_IMM_TO_REG(0x10000, factor.reg); + CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, factor.reg); + scratches.recycle(temp_reg); + + //we will resue factor.reg + build_blendFOneMinusF(temp, factor, fragment, fogColor); + MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP); + scratches.recycle(temp.reg); + } +} + +void GGLX86Assembler::build_blending( + component_t& temp, // incomming fragment / output + pixel_t& pixel, // framebuffer + int component, + Scratch& regs) +{ + if (!mInfo[component].blend) + return; + + int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; + int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; + if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) + fs = GGL_ONE; + const int blending = blending_codes(fs, fd); + if (!temp.size()) { + // here, blending will produce something which doesn't depend on + // that component (eg: GL_ZERO:GL_*), so the register has not been + // allocated yet. Will never be used as a source. + //temp = component_t(regs.obtain(), CORRUPTIBLE, temp_offset_ebp); + temp.reg = regs.obtain(); + temp.flags = CORRUPTIBLE; + temp.h = temp.l = 0; + } else { + temp.reg = regs.obtain(); + } + MOV_MEM_TO_REG(temp.offset_ebp, EBP, temp.reg); + // we are doing real blending... + // fb: extracted dst + // fragment: extracted src + // temp: component_t(fragment) and result + + // scoped register allocator + Scratch scratches(registerFile()); + comment("blending"); + + // we can optimize these cases a bit... + // (1) saturation is not needed + // (2) we can use only one multiply instead of 2 + // (3) we can reduce the register pressure + // R = S*f + D*(1-f) = (S-D)*f + D + // R = S*(1-f) + D*f = (D-S)*f + S + + const bool same_factor_opt1 = + (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) || + (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) || + (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) || + (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA); + + const bool same_factor_opt2 = + (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) || + (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) || + (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) || + (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA); + + + // XXX: we could also optimize these cases: + // R = S*f + D*f = (S+D)*f + // R = S*(1-f) + D*(1-f) = (S+D)*(1-f) + // R = S*D + D*S = 2*S*D + + + pixel.reg = scratches.obtain(); + MOV_MEM_TO_REG(pixel.offset_ebp, EBP, pixel.reg); + // see if we need to extract 'component' from the destination (fb) + integer_t fb; + if (blending & (BLEND_DST|FACTOR_DST)) { + fb.setTo(scratches.obtain(), 32); + extract(fb, pixel, component); + if (mDithering) { + // XXX: maybe what we should do instead, is simply + // expand fb -or- fragment to the larger of the two + if (fb.size() < temp.size()) { + // for now we expand 'fb' to min(fragment, 8) + int new_size = temp.size() < 8 ? temp.size() : 8; + expand(fb, fb, new_size); + } + } + } + + // convert input fragment to integer_t + if (temp.l && (temp.flags & CORRUPTIBLE)) { + SHR(temp.l, temp.reg); + temp.h -= temp.l; + temp.l = 0; + } + integer_t fragment(temp.reg, temp.size(), temp.flags, temp.offset_ebp); + + // if not done yet, convert input fragment to integer_t + if (temp.l) { + // here we know temp is not CORRUPTIBLE + fragment.reg = scratches.obtain(); + MOV_REG_TO_REG(temp.reg, fragment.reg); + SHR(temp.l, fragment.reg); + fragment.flags |= CORRUPTIBLE; + } + + if (!(temp.flags & CORRUPTIBLE)) { + // temp is not corruptible, but since it's the destination it + // will be modified, so we need to allocate a new register. + temp.reg = regs.obtain(); + temp.flags &= ~CORRUPTIBLE; + fragment.flags &= ~CORRUPTIBLE; + } + + if ((blending & BLEND_SRC) && !same_factor_opt1) { + // source (fragment) is needed for the blending stage + // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1) + fragment.flags &= ~CORRUPTIBLE; + } + + + if (same_factor_opt1) { + // R = S*f + D*(1-f) = (S-D)*f + D + integer_t factor; + build_blend_factor(factor, fs, + component, pixel, fragment, fb, scratches); + // fb is always corruptible from this point + fb.flags |= CORRUPTIBLE; + //we will reuse factor in mul_factor_add of build_blendFOneMinusF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor + if(factor.reg == fragment.reg || factor.reg == fb.reg) + MOV_REG_TO_REG(factor.reg, pixel.reg); + else + scratches.recycle(pixel.reg); + build_blendFOneMinusF(temp, factor, fragment, fb); + if(factor.reg == fragment.reg || factor.reg == fb.reg) { + MOV_REG_TO_REG(pixel.reg, factor.reg); + scratches.recycle(pixel.reg); + } + scratches.recycle(fb.reg); + //scratches.recycle(factor.reg); + } else if (same_factor_opt2) { + // R = S*(1-f) + D*f = (D-S)*f + S + integer_t factor; + // fb is always corrruptible here + fb.flags |= CORRUPTIBLE; + build_blend_factor(factor, fd, + component, pixel, fragment, fb, scratches); + //we will reuse factor in mul_factor_add of build_blendFOneMinusFF, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor + if(factor.reg == fragment.reg || factor.reg == fb.reg) + MOV_REG_TO_REG(factor.reg, pixel.reg); + else + scratches.recycle(pixel.reg); + build_blendOneMinusFF(temp, factor, fragment, fb); + if(factor.reg == fragment.reg || factor.reg == fb.reg) { + MOV_REG_TO_REG(pixel.reg, factor.reg); + scratches.recycle(pixel.reg); + } + scratches.recycle(fb.reg); + } else { + integer_t src_factor; + integer_t dst_factor; + + // if destination (fb) is not needed for the blending stage, + // then it can be marked as CORRUPTIBLE + if (!(blending & BLEND_DST)) { + fb.flags |= CORRUPTIBLE; + } + + // XXX: try to mark some registers as CORRUPTIBLE + // in most case we could make those corruptible + // when we're processing the last component + // but not always, for instance + // when fragment is constant and not reloaded + // when fb is needed for logic-ops or masking + // when a register is aliased (for instance with mAlphaSource) + + // blend away... + if (fs==GGL_ZERO) { + if (fd==GGL_ZERO) { // R = 0 + // already taken care of + } else if (fd==GGL_ONE) { // R = D + // already taken care of + } else { // R = D*fd + // compute fd + build_blend_factor(dst_factor, fd, + component, pixel, fragment, fb, scratches); + scratches.recycle(pixel.reg); + mul_factor(temp, fb, dst_factor, regs); + scratches.recycle(fb.reg); + } + } else if (fs==GGL_ONE) { + int temp_reg; + if (fd==GGL_ZERO) { // R = S + // NOP, taken care of + } else if (fd==GGL_ONE) { // R = S + D + component_add(temp, fb, fragment); // args order matters + temp_reg = scratches.obtain(); + component_sat(temp, temp_reg); + scratches.recycle(temp_reg); + } else { // R = S + D*fd + // compute fd + build_blend_factor(dst_factor, fd, + component, pixel, fragment, fb, scratches); + //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) + MOV_REG_TO_REG(dst_factor.reg, pixel.reg); + else + scratches.recycle(pixel.reg); + mul_factor_add(temp, fb, dst_factor, component_t(fragment)); + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) { + MOV_REG_TO_REG(pixel.reg, dst_factor.reg); + scratches.recycle(pixel.reg); + } + temp_reg = fb.reg; + component_sat(temp, temp_reg); + scratches.recycle(fb.reg); + } + } else { + // compute fs + int temp_reg; + build_blend_factor(src_factor, fs, + component, pixel, fragment, fb, scratches); + if (fd==GGL_ZERO) { // R = S*fs + mul_factor(temp, fragment, src_factor, regs); + if (scratches.isUsed(src_factor.reg)) + scratches.recycle(src_factor.reg); + } else if (fd==GGL_ONE) { // R = S*fs + D + //we will probably change src_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg in build_blend_factor + if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg) + MOV_REG_TO_REG(src_factor.reg, pixel.reg); + else + scratches.recycle(pixel.reg); + mul_factor_add(temp, fragment, src_factor, component_t(fb)); + if(src_factor.reg == fragment.reg || src_factor.reg == fb.reg) { + MOV_REG_TO_REG(pixel.reg, src_factor.reg); + scratches.recycle(pixel.reg); + } + temp_reg = fb.reg; + component_sat(temp, temp_reg); + scratches.recycle(fb.reg); + } else { // R = S*fs + D*fd + mul_factor(temp, fragment, src_factor, regs); + if (scratches.isUsed(src_factor.reg)) + scratches.recycle(src_factor.reg); + // compute fd + build_blend_factor(dst_factor, fd, + component, pixel, fragment, fb, scratches); + //we will probably change dst_factor in mul_factor_add, unless factor.reg == fragment.reg == temp.reg or factor.reg == fb.reg + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) + MOV_REG_TO_REG(dst_factor.reg, pixel.reg); + else + scratches.recycle(pixel.reg); + mul_factor_add(temp, fb, dst_factor, temp); + if(dst_factor.reg == fragment.reg || dst_factor.reg == fb.reg) { + MOV_REG_TO_REG(pixel.reg, dst_factor.reg); + scratches.recycle(pixel.reg); + } + if (!same_factor_opt1 && !same_factor_opt2) { + temp_reg = fb.reg; + component_sat(temp, temp_reg); + } + scratches.recycle(fb.reg); + } + if(scratches.isUsed(pixel.reg)) + scratches.recycle(pixel.reg); + } + } + // temp is modified, but it will be used immediately in downshift + //printf("temp.offset_ebp: %d \n", temp.offset_ebp); + //below will be triggered on CDK for surfaceflinger + if(temp.offset_ebp == mAlphaSource.offset_ebp) { + mCurSp = mCurSp - 4; + temp.offset_ebp = mCurSp; + } + // the r, g, b value must be stored, otherwise the color of globaltime is incorrect. + MOV_REG_TO_MEM(temp.reg, temp.offset_ebp, EBP); + regs.recycle(temp.reg); + + // now we can be corrupted (it's the dest) + temp.flags |= CORRUPTIBLE; +} + +void GGLX86Assembler::build_blend_factor( + integer_t& factor, int f, int component, + const pixel_t& dst_pixel, + integer_t& fragment, + integer_t& fb, + Scratch& scratches) +{ + integer_t src_alpha(fragment); + + // src_factor/dst_factor won't be used after blending, + // so it's fine to mark them as CORRUPTIBLE (if not aliased) + factor.flags |= CORRUPTIBLE; + int temp_reg; + switch(f) { + case GGL_ONE_MINUS_SRC_ALPHA: + case GGL_SRC_ALPHA: + if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) { + // we're processing alpha, so we already have + // src-alpha in fragment, and we need src-alpha just this time. + } else { + // alpha-src will be needed for other components + factor = mAlphaSource; + factor.flags &= ~CORRUPTIBLE; + factor.reg = scratches.obtain(); + //printf("mAlphaSource.offset_ebp: %d \n", mAlphaSource.offset_ebp); + //printf("fragment.offset_ebp: %d \n", fragment.offset_ebp); + //printf("factor.offset_ebp: %d \n", factor.offset_ebp); + MOV_MEM_TO_REG(mAlphaSource.offset_ebp, EBP, factor.reg); + if (!mBlendFactorCached || mBlendFactorCached==f) { + src_alpha = mAlphaSource; + // we already computed the blend factor before, nothing to do. + if (mBlendFactorCached) + return; + // this is the first time, make sure to compute the blend + // factor properly. + mBlendFactorCached = f; + break; + } else { + // we have a cached alpha blend factor, but we want another one, + // this should really not happen because by construction, + // we cannot have BOTH source and destination + // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because + // the blending stage uses the f/(1-f) optimization + + // for completeness, we handle this case though. Since there + // are only 2 choices, this meens we want "the other one" + // (1-factor) + //factor = mAlphaSource; + //factor.flags &= ~CORRUPTIBLE; + NEG(factor.reg); + ADD_IMM_TO_REG((1<<factor.s), factor.reg); + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); + mBlendFactorCached = f; + return; + } + } + // fall-through... + case GGL_ONE_MINUS_DST_COLOR: + case GGL_DST_COLOR: + case GGL_ONE_MINUS_SRC_COLOR: + case GGL_SRC_COLOR: + case GGL_ONE_MINUS_DST_ALPHA: + case GGL_DST_ALPHA: + case GGL_SRC_ALPHA_SATURATE: + // help us find out what register we can use for the blend-factor + // CORRUPTIBLE registers are chosen first, or a new one is allocated. + if (fragment.flags & CORRUPTIBLE) { + factor.setTo(fragment.reg, 32, CORRUPTIBLE, fragment.offset_ebp); + fragment.flags &= ~CORRUPTIBLE; + } else if (fb.flags & CORRUPTIBLE) { + factor.setTo(fb.reg, 32, CORRUPTIBLE, fb.offset_ebp); + fb.flags &= ~CORRUPTIBLE; + } else { + factor.setTo(scratches.obtain(), 32, CORRUPTIBLE); + mCurSp = mCurSp - 4; + factor.offset_ebp = mCurSp; + } + break; + } + + // XXX: doesn't work if size==1 + + switch(f) { + case GGL_ONE_MINUS_DST_COLOR: + case GGL_DST_COLOR: + factor.s = fb.s; + MOV_REG_TO_REG(fb.reg, factor.reg); + SHR(fb.s-1, factor.reg); + ADD_REG_TO_REG(fb.reg, factor.reg); + break; + case GGL_ONE_MINUS_SRC_COLOR: + case GGL_SRC_COLOR: + factor.s = fragment.s; + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(fragment.reg, temp_reg); + SHR(fragment.s-1, fragment.reg); + ADD_REG_TO_REG(temp_reg, fragment.reg); + scratches.recycle(temp_reg); + break; + case GGL_ONE_MINUS_SRC_ALPHA: + case GGL_SRC_ALPHA: + factor.s = src_alpha.s; + if (mBlendFactorCached == f) { + //src_alpha == factor == mAlphaSource, we need a temp reg + if(scratches.countFreeRegs()) { + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(factor.reg, temp_reg); + SHR(src_alpha.s-1, factor.reg); + ADD_REG_TO_REG(temp_reg, factor.reg); + scratches.recycle(temp_reg); + } + else { + SHR(src_alpha.s-1, factor.offset_ebp, EBP); + ADD_MEM_TO_REG(EBP, factor.offset_ebp, factor.reg); + } + } + else + { + MOV_REG_TO_REG(src_alpha.reg, factor.reg); + SHR(src_alpha.s-1, factor.reg); + ADD_REG_TO_REG(src_alpha.reg, factor.reg); + } + // we will store factor in the next switch for GGL_ONE_MINUS_SRC_ALPHA + if(f == GGL_SRC_ALPHA) + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); + break; + case GGL_ONE_MINUS_DST_ALPHA: + case GGL_DST_ALPHA: + // XXX: should be precomputed + extract(factor, dst_pixel, GGLFormat::ALPHA); + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(factor.reg, temp_reg); + SHR(factor.s-1, factor.reg); + ADD_REG_TO_REG(temp_reg, factor.reg); + scratches.recycle(temp_reg); + break; + case GGL_SRC_ALPHA_SATURATE: + // XXX: should be precomputed + // XXX: f = min(As, 1-Ad) + // btw, we're guaranteed that Ad's size is <= 8, because + // it's extracted from the framebuffer + break; + } + + switch(f) { + case GGL_ONE_MINUS_DST_COLOR: + case GGL_ONE_MINUS_SRC_COLOR: + case GGL_ONE_MINUS_DST_ALPHA: + case GGL_ONE_MINUS_SRC_ALPHA: + NEG(factor.reg); + ADD_IMM_TO_REG(1<<factor.s, factor.reg); + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); + } + + // don't need more than 8-bits for the blend factor + // and this will prevent overflows in the multiplies later + if (factor.s > 8) { + SHR(factor.s-8, factor.reg); + factor.s = 8; + if(f == GGL_ONE_MINUS_SRC_ALPHA || f == GGL_SRC_ALPHA) + MOV_REG_TO_MEM(factor.reg, factor.offset_ebp, EBP); + } + //below will be triggered on CDK for surfaceflinger + if(fragment.offset_ebp == mAlphaSource.offset_ebp) + MOV_REG_TO_REG(factor.reg, fragment.reg); +} + +int GGLX86Assembler::blending_codes(int fs, int fd) +{ + int blending = 0; + switch(fs) { + case GGL_ONE: + blending |= BLEND_SRC; + break; + + case GGL_ONE_MINUS_DST_COLOR: + case GGL_DST_COLOR: + blending |= FACTOR_DST|BLEND_SRC; + break; + case GGL_ONE_MINUS_DST_ALPHA: + case GGL_DST_ALPHA: + // no need to extract 'component' from the destination + // for the blend factor, because we need ALPHA only. + blending |= BLEND_SRC; + break; + + case GGL_ONE_MINUS_SRC_COLOR: + case GGL_SRC_COLOR: + blending |= FACTOR_SRC|BLEND_SRC; + break; + case GGL_ONE_MINUS_SRC_ALPHA: + case GGL_SRC_ALPHA: + case GGL_SRC_ALPHA_SATURATE: + blending |= FACTOR_SRC|BLEND_SRC; + break; + } + switch(fd) { + case GGL_ONE: + blending |= BLEND_DST; + break; + + case GGL_ONE_MINUS_DST_COLOR: + case GGL_DST_COLOR: + blending |= FACTOR_DST|BLEND_DST; + break; + case GGL_ONE_MINUS_DST_ALPHA: + case GGL_DST_ALPHA: + blending |= FACTOR_DST|BLEND_DST; + break; + + case GGL_ONE_MINUS_SRC_COLOR: + case GGL_SRC_COLOR: + blending |= FACTOR_SRC|BLEND_DST; + break; + case GGL_ONE_MINUS_SRC_ALPHA: + case GGL_SRC_ALPHA: + // no need to extract 'component' from the source + // for the blend factor, because we need ALPHA only. + blending |= BLEND_DST; + break; + } + return blending; +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::build_blendFOneMinusF( + component_t& temp, + const integer_t& factor, + const integer_t& fragment, + const integer_t& fb) +{ + // R = S*f + D*(1-f) = (S-D)*f + D + // compute S-D + Scratch scratches(registerFile()); + integer_t diff(fragment.flags & CORRUPTIBLE ? + fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); + const int shift = fragment.size() - fb.size(); + if (shift>0) { + MOV_REG_TO_REG(fragment.reg, diff.reg); + SHR(shift, diff.reg); + SUB_REG_TO_REG(fb.reg, diff.reg); + } else if (shift<0) { + MOV_REG_TO_REG(fragment.reg, diff.reg); + SHL(-shift, diff.reg); + SUB_REG_TO_REG(fb.reg, diff.reg); + } else { + MOV_REG_TO_REG(fragment.reg, diff.reg); + SUB_REG_TO_REG(fb.reg, diff.reg); + } + mul_factor_add(temp, diff, factor, component_t(fb)); + if(!(fragment.flags & CORRUPTIBLE)) + scratches.recycle(diff.reg); +} + +void GGLX86Assembler::build_blendOneMinusFF( + component_t& temp, + const integer_t& factor, + const integer_t& fragment, + const integer_t& fb) +{ + // R = S*f + D*(1-f) = (S-D)*f + D + Scratch scratches(registerFile()); + // compute D-S + integer_t diff(fb.flags & CORRUPTIBLE ? + fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); + const int shift = fragment.size() - fb.size(); + if (shift>0) { + SHR(shift, fragment.reg); + MOV_REG_TO_REG(fb.reg, diff.reg); + SUB_REG_TO_REG(fragment.reg, diff.reg); + } + else if (shift<0) { + SHR(-shift, fragment.reg); + MOV_REG_TO_REG(fb.reg, diff.reg); + SUB_REG_TO_REG(fragment.reg, diff.reg); + } + else { + MOV_REG_TO_REG(fb.reg, diff.reg); + SUB_REG_TO_REG(fragment.reg, diff.reg); + } + + mul_factor_add(temp, diff, factor, component_t(fragment)); + if(!(fragment.flags & CORRUPTIBLE)) + scratches.recycle(diff.reg); +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::mul_factor( component_t& d, + const integer_t& v, + const integer_t& f, Scratch& scratches) +{ + // f can be changed + // + int vs = v.size(); + int fs = f.size(); + int ms = vs+fs; + + // XXX: we could have special cases for 1 bit mul + + // all this code below to use the best multiply instruction + // wrt the parameters size. We take advantage of the fact + // that the 16-bits multiplies allow a 16-bit shift + // The trick is that we just make sure that we have at least 8-bits + // per component (which is enough for a 8 bits display). + + int xy = -1; + int vshift = 0; + int fshift = 0; + int smulw = 0; + + int xyBB = 0; + int xyTB = 1; + int xyTT = 2; + int xyBT = 3; + if (vs<16) { + if (fs<16) { + xy = xyBB; + } else if (GGL_BETWEEN(fs, 24, 31)) { + ms -= 16; + xy = xyTB; + } else { + // eg: 15 * 18 -> 15 * 15 + fshift = fs - 15; + ms -= fshift; + xy = xyBB; + } + } else if (GGL_BETWEEN(vs, 24, 31)) { + if (fs<16) { + ms -= 16; + xy = xyTB; + } else if (GGL_BETWEEN(fs, 24, 31)) { + ms -= 32; + xy = xyTT; + } else { + // eg: 24 * 18 -> 8 * 18 + fshift = fs - 15; + ms -= 16 + fshift; + xy = xyTB; + } + } else { + if (fs<16) { + // eg: 18 * 15 -> 15 * 15 + vshift = vs - 15; + ms -= vshift; + xy = xyBB; + } else if (GGL_BETWEEN(fs, 24, 31)) { + // eg: 18 * 24 -> 15 * 8 + vshift = vs - 15; + ms -= 16 + vshift; + xy = xyBT; + } else { + // eg: 18 * 18 -> (15 * 18)>>16 + fshift = fs - 15; + ms -= 16 + fshift; + //xy = yB; //XXX SMULWB + smulw = 1; + } + } + + ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs); + + int vreg = v.reg; + int freg = f.reg; + if (vshift) { + MOV_REG_TO_REG(vreg, d.reg); + SHR(vshift, d.reg); + vreg = d.reg; + } + if (fshift) { + MOV_REG_TO_REG(vreg, d.reg); + SHR(fshift, d.reg); + freg = d.reg; + } + MOV_REG_TO_REG(vreg, d.reg); + if (smulw) { + int flag_push_edx = 0; + int flag_reserve_edx = 0; + int temp_reg2 = -1; + int edx_offset_ebp = 0; + if(scratches.isUsed(EDX) == 1) { + if(d.reg != EDX) { + flag_push_edx = 1; + mCurSp = mCurSp - 4; + edx_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); + //PUSH(EDX); + } + } + else { + flag_reserve_edx = 1; + scratches.reserve(EDX); + } + if(scratches.isUsed(EAX)) { + if( freg == EAX || d.reg == EAX) { + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); + if(freg == EAX) + IMUL(d.reg); + else + IMUL(freg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d.reg); + } + else { + int eax_offset_ebp = 0; + if(scratches.countFreeRegs() > 0) { + temp_reg2 = scratches.obtain(); + MOV_REG_TO_REG(EAX, temp_reg2); + } + else { + mCurSp = mCurSp - 4; + eax_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EAX, eax_offset_ebp, EBP); + //PUSH(EAX); + } + MOV_REG_TO_REG(freg, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(d.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d.reg); + if(temp_reg2 > -1) { + MOV_REG_TO_REG(temp_reg2, EAX); + scratches.recycle(temp_reg2); + } + else { + MOV_MEM_TO_REG(eax_offset_ebp, EBP, EAX); + //POP(EAX); + } + } + } + else { + MOV_REG_TO_REG(freg, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(d.reg); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d.reg); + } + if(flag_push_edx == 1) { + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); + //POP(EDX); + } + if(flag_reserve_edx ==1) + scratches.recycle(EDX); + } + else { + if(xy == xyBB) { + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); + IMUL(freg, d.reg); + } + else if(xy == xyTB) { + SHR(16, d.reg); + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); + IMUL(freg, d.reg); + } + else if(xy == xyBT) { + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); + SHR(16, freg); + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); + IMUL(freg, d.reg); + } + else if(xy == xyTT) { + SHR(16, d.reg); + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); + SHR(16, freg); + MOVSX_REG_TO_REG(OpndSize_16, freg, freg); + IMUL(freg, d.reg); + } + } + + + d.h = ms; + if (mDithering) { + d.l = 0; + } else { + d.l = fs; + d.flags |= CLEAR_LO; + } +} + +void GGLX86Assembler::mul_factor_add( component_t& d, + const integer_t& v, + const integer_t& f, + const component_t& a) +{ + // XXX: we could have special cases for 1 bit mul + Scratch scratches(registerFile()); + + int vs = v.size(); + int fs = f.size(); + int as = a.h; + int ms = vs+fs; + + ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as); + + integer_t add(a.reg, a.h, a.flags, a.offset_ebp); + + + // 'a' is a component_t but it is guaranteed to have + // its high bits set to 0. However in the dithering case, + // we can't get away with truncating the potentially bad bits + // so extraction is needed. + + if ((mDithering) && (a.size() < ms)) { + // we need to expand a + if (!(a.flags & CORRUPTIBLE)) { + // ... but it's not corruptible, so we need to pick a + // temporary register. + // Try to uses the destination register first (it's likely + // to be usable, unless it aliases an input). + if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) { + add.reg = d.reg; + } else { + add.reg = scratches.obtain(); + } + } + expand(add, a, ms); // extracts and expands + as = ms; + } + + if (ms == as) { + MOV_REG_TO_REG(v.reg, d.reg); + if (vs<16 && fs<16) { + MOVSX_REG_TO_REG(OpndSize_16, d.reg, d.reg); + MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg); + IMUL(f.reg, d.reg); + } + else + IMUL(f.reg, d.reg); + ADD_REG_TO_REG(add.reg, d.reg); + } else { + //int temp = d.reg; + //if (temp == add.reg) { + // // the mul will modify add.reg, we need an intermediary reg + // if (v.flags & CORRUPTIBLE) temp = v.reg; + // else if (f.flags & CORRUPTIBLE) temp = f.reg; + // else temp = scratches.obtain(); + //} + + // below d.reg may override "temp" result, so we use a new register + int temp_reg; + int v_offset_ebp = 0; + if(scratches.countFreeRegs() == 0) { + temp_reg = v.reg; + mCurSp = mCurSp - 4; + v_offset_ebp = mCurSp; + MOV_REG_TO_MEM(v.reg, v_offset_ebp, EBP); + } + else { + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(v.reg, temp_reg); + } + if (vs<16 && fs<16) { + MOVSX_REG_TO_REG(OpndSize_16, temp_reg, temp_reg); + MOVSX_REG_TO_REG(OpndSize_16, f.reg, f.reg); + IMUL(f.reg, temp_reg); + } + else + IMUL(f.reg, temp_reg); + + if (ms>as) { + MOV_REG_TO_REG(add.reg, d.reg); + SHL(ms-as, d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + } else if (ms<as) { + // not sure if we should expand the mul instead? + MOV_REG_TO_REG(add.reg, d.reg); + SHL(as-ms, d.reg); + ADD_REG_TO_REG(temp_reg, d.reg); + } + if(temp_reg == v.reg) + MOV_MEM_TO_REG(v_offset_ebp, EBP, v.reg); + else + scratches.recycle(temp_reg); + } + + d.h = ms; + if (mDithering) { + d.l = a.l; + } else { + d.l = fs>a.l ? fs : a.l; + d.flags |= CLEAR_LO; + } +} + +void GGLX86Assembler::component_add(component_t& d, + const integer_t& dst, const integer_t& src) +{ + // here we're guaranteed that fragment.size() >= fb.size() + const int shift = src.size() - dst.size(); + if (!shift) { + MOV_REG_TO_REG(src.reg, d.reg); + ADD_REG_TO_REG(dst.reg, d.reg); + } else { + MOV_REG_TO_REG(dst.reg, d.reg); + SHL(shift, d.reg); + ADD_REG_TO_REG(src.reg, d.reg); + } + + d.h = src.size(); + if (mDithering) { + d.l = 0; + } else { + d.l = shift; + d.flags |= CLEAR_LO; + } +} + +void GGLX86Assembler::component_sat(const component_t& v, const int temp_reg) +{ + const int32_t one = ((1<<v.size())-1)<<v.l; + MOV_IMM_TO_REG(one, temp_reg); + CMP_IMM_TO_REG(1<<v.h, v.reg); + CMOV_REG_TO_REG(Mnemonic_CMOVAE, temp_reg, v.reg); +} + +// ---------------------------------------------------------------------------- + +}; // namespace android diff --git a/libpixelflinger/codeflinger/x86/libenc/Android.mk b/libpixelflinger/codeflinger/x86/libenc/Android.mk new file mode 100644 index 0000000..445de06 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/Android.mk @@ -0,0 +1,30 @@ +# +# Copyright (C) 2015 The Android-x86 Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +LOCAL_PATH := $(call my-dir) + +enc_src_files := \ + dec_base.cpp \ + enc_base.cpp \ + enc_tabl.cpp \ + enc_wrapper.cpp + +include $(CLEAR_VARS) +LOCAL_SRC_FILES := $(enc_src_files) +LOCAL_MODULE := libenc +LOCAL_MODULE_TAGS := optional +LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH) +include $(BUILD_STATIC_LIBRARY) diff --git a/libpixelflinger/codeflinger/x86/libenc/README.txt b/libpixelflinger/codeflinger/x86/libenc/README.txt new file mode 100644 index 0000000..a2e73ec --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/README.txt @@ -0,0 +1,21 @@ +Original source from Apache Harmony 5.0M15 (r991518 from 2010-09-01) at +http://harmony.apache.org/. + +The following files are from drlvm/vm/port/src/encoder/ia32_em64t. + + dec_base.cpp + dec_base.h + enc_base.cpp + enc_base.h + enc_defs.h + enc_prvt.h + enc_tabl.cpp + encoder.cpp + encoder.h + encoder.inl + +The following files are derived partially from the original Apache +Harmony files. + + enc_defs_ext.h -- derived from enc_defs.h + enc_wrapper.h -- derived from encoder.h diff --git a/libpixelflinger/codeflinger/x86/libenc/dec_base.cpp b/libpixelflinger/codeflinger/x86/libenc/dec_base.cpp new file mode 100644 index 0000000..ea85d10 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.cpp @@ -0,0 +1,541 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ + +/** + * @file + * @brief Main decoding (disassembling) routines implementation. + */ + +#include "dec_base.h" +#include "enc_prvt.h" +#include <stdio.h> +//#include "open/common.h" + +bool DecoderBase::is_prefix(const unsigned char * bytes) +{ + unsigned char b0 = *bytes; + unsigned char b1 = *(bytes+1); + if (b0 == 0xF0) { // LOCK + return true; + } + if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes + if (b1 == 0x0F) { // .... but may be a part of SIMD opcode + return false; + } + return true; + } + if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) { + // branch hints, segment prefixes + return true; + } + if (b0==0x66) { // operand-size prefix + if (b1 == 0x0F) { // .... but may be a part of SIMD opcode + return false; + } + return false; //XXX - currently considered as part of opcode//true; + } + if (b0==0x67) { // address size prefix + return true; + } + return false; +} + +// Returns prefix count from 0 to 4, or ((unsigned int)-1) on error +unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst) +{ + const unsigned char * my_bytes = bytes; + + while( 1 ) + { + unsigned char by1 = *my_bytes; + unsigned char by2 = *(my_bytes + 1); + Inst::PrefGroups where; + + switch( by1 ) + { + case InstPrefix_REPNE: + case InstPrefix_REP: + { + if( 0x0F == by2) + { + return pinst->prefc; + } + } + case InstPrefix_LOCK: + { + where = Inst::Group1; + break; + } + case InstPrefix_CS: + case InstPrefix_SS: + case InstPrefix_DS: + case InstPrefix_ES: + case InstPrefix_FS: + case InstPrefix_GS: +// case InstPrefix_HintTaken: the same as CS override +// case InstPrefix_HintNotTaken: the same as DS override + { + where = Inst::Group2; + break; + } + case InstPrefix_OpndSize: + { +//NOTE: prefix does not work for JMP Sz16, the opcode is 0x66 0xe9 +// here 0x66 will be treated as prefix, try_mn will try to match the code starting at 0xe9 +// it will match JMP Sz32 ... +//HACK: assume it is the last prefix, return any way + if( 0x0F == by2) + { + return pinst->prefc; + } + return pinst->prefc; + where = Inst::Group3; + break; + } + case InstPrefix_AddrSize: + { + where = Inst::Group4; + break; + } + default: + { + return pinst->prefc; + } + } + // Assertions are not allowed here. + // Error situations should result in returning error status + if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group + return (unsigned int)-1; + + pinst->pref[where] = (InstPrefix)by1; + + if (pinst->prefc >= 4) //no more than 4 prefixes + return (unsigned int)-1; + + pinst->prefc++; + ++my_bytes; + } +} + + + +unsigned DecoderBase::decode(const void * addr, Inst * pinst) +{ + Inst tmp; + + //assert( *(unsigned char*)addr != 0x66); + + const unsigned char * bytes = (unsigned char*)addr; + + // Load up to 4 prefixes + // for each Mnemonic + unsigned int pref_count = fill_prefs(bytes, &tmp); + + if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes + return 0; // Error + + bytes += pref_count; + + // for each opcodedesc + // if (raw_len == 0) memcmp(, raw_len) + // else check the mixed state which is one of the following: + // /digit /i /rw /rd /rb + + bool found = false; + const unsigned char * saveBytes = bytes; + for (unsigned mn=1; mn<Mnemonic_Count; mn++) { + bytes = saveBytes; + found=try_mn((Mnemonic)mn, &bytes, &tmp); + if (found) { + tmp.mn = (Mnemonic)mn; + break; + } + } + if (!found) { + // Unknown opcode + return 0; + } + tmp.size = (unsigned)(bytes-(const unsigned char*)addr); + if (pinst) { + *pinst = tmp; + } + return tmp.size; +} + +#ifdef _EM64T_ +#define EXTEND_REG(reg, flag) \ + ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8)) +#else +#define EXTEND_REG(reg, flag) (reg) +#endif + +//don't know the use of rex, seems not used when _EM64T_ is not enabled +bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux, + const unsigned char ** pbuf, Inst * pinst +#ifdef _EM64T_ + , const Rex UNREF *rex +#endif + ) +{ + OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask); + unsigned byte = (aux & OpcodeByteKind_OpcodeMask); + unsigned data_byte = **pbuf; + EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; + const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; + + switch (kind) { + case OpcodeByteKind_SlashR: + { + RegName reg; + OpndKind okind; + const ModRM& modrm = *(ModRM*)*pbuf; + if (opndDesc.kind & OpndKind_Mem) { // 1st operand is memory +#ifdef _EM64T_ + decodeModRM(odesc, pbuf, pinst, rex); +#else + decodeModRM(odesc, pbuf, pinst); +#endif + ++pinst->argc; + const EncoderBase::OpndDesc& opndDesc2 = odesc.opnds[pinst->argc]; + okind = ((opndDesc2.kind & OpndKind_XMMReg) || opndDesc2.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; + EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; + reg = getRegName(okind, opndDesc2.size, EXTEND_REG(modrm.reg, r)); + regOpnd = EncoderBase::Operand(reg); + } else { // 2nd operand is memory + okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size==OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; + EncoderBase::Operand& regOpnd = pinst->operands[pinst->argc]; + reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.reg, r)); + regOpnd = EncoderBase::Operand(reg); + ++pinst->argc; +#ifdef _EM64T_ + decodeModRM(odesc, pbuf, pinst, rex); +#else + decodeModRM(odesc, pbuf, pinst); +#endif + } + ++pinst->argc; + } + return true; + case OpcodeByteKind_rb: + case OpcodeByteKind_rw: + case OpcodeByteKind_rd: + { + // Gregory - + // Here we don't parse register because for current needs + // disassembler doesn't require to parse all operands + unsigned regid = data_byte - byte; + if (regid>7) { + return false; + } + OpndSize opnd_size; + switch(kind) + { + case OpcodeByteKind_rb: + { + opnd_size = OpndSize_8; + break; + } + case OpcodeByteKind_rw: + { + opnd_size = OpndSize_16; + break; + } + case OpcodeByteKind_rd: + { + opnd_size = OpndSize_32; + break; + } + default: + opnd_size = OpndSize_32; // so there is no compiler warning + assert( false ); + } + opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) ); + + ++pinst->argc; + ++*pbuf; + return true; + } + case OpcodeByteKind_cb: + { + char offset = *(char*)*pbuf; + *pbuf += 1; + opnd = EncoderBase::Operand(offset); + ++pinst->argc; + //pinst->direct_addr = (void*)(pinst->offset + *pbuf); + } + return true; + case OpcodeByteKind_cw: + // not an error, but not expected in current env + // Android x86 + { + short offset = *(short*)*pbuf; + *pbuf += 2; + opnd = EncoderBase::Operand(offset); + ++pinst->argc; + } + return true; + //return false; + case OpcodeByteKind_cd: + { + int offset = *(int*)*pbuf; + *pbuf += 4; + opnd = EncoderBase::Operand(offset); + ++pinst->argc; + } + return true; + case OpcodeByteKind_SlashNum: + { + const ModRM& modrm = *(ModRM*)*pbuf; + if (modrm.reg != byte) { + return false; + } + decodeModRM(odesc, pbuf, pinst +#ifdef _EM64T_ + , rex +#endif + ); + ++pinst->argc; + } + return true; + case OpcodeByteKind_ib: + { + char ival = *(char*)*pbuf; + opnd = EncoderBase::Operand(ival); + ++pinst->argc; + *pbuf += 1; + } + return true; + case OpcodeByteKind_iw: + { + short ival = *(short*)*pbuf; + opnd = EncoderBase::Operand(ival); + ++pinst->argc; + *pbuf += 2; + } + return true; + case OpcodeByteKind_id: + { + int ival = *(int*)*pbuf; + opnd = EncoderBase::Operand(ival); + ++pinst->argc; + *pbuf += 4; + } + return true; +#ifdef _EM64T_ + case OpcodeByteKind_io: + { + long long int ival = *(long long int*)*pbuf; + opnd = EncoderBase::Operand(OpndSize_64, ival); + ++pinst->argc; + *pbuf += 8; + } + return true; +#endif + case OpcodeByteKind_plus_i: + { + unsigned regid = data_byte - byte; + if (regid>7) { + return false; + } + ++*pbuf; + return true; + } + case OpcodeByteKind_ZeroOpcodeByte: // cant be here + return false; + default: + // unknown kind ? how comes ? + break; + } + return false; +} + +bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) { + const unsigned char * save_pbuf = *pbuf; + EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn]; + + for (unsigned i=0; !opcodes[i].last; i++) { + const EncoderBase::OpcodeDesc& odesc = opcodes[i]; + char *opcode_ptr = const_cast<char *>(odesc.opcode); + int opcode_len = odesc.opcode_len; +#ifdef _EM64T_ + Rex *prex = NULL; + Rex rex; +#endif + + *pbuf = save_pbuf; +#ifdef _EM64T_ + // Match REX prefixes + unsigned char rex_byte = (*pbuf)[0]; + if ((rex_byte & 0xf0) == 0x40) + { + if ((rex_byte & 0x08) != 0) + { + // Have REX.W + if (opcode_len > 0 && opcode_ptr[0] == 0x48) + { + // Have REX.W in opcode. All mnemonics that allow + // REX.W have to have specified it in opcode, + // otherwise it is not allowed + rex = *(Rex *)*pbuf; + prex = &rex; + (*pbuf)++; + opcode_ptr++; + opcode_len--; + } + } + else + { + // No REX.W, so it doesn't have to be in opcode. We + // have REX.B, REX.X, REX.R or their combination, but + // not in opcode, they may extend any part of the + // instruction + rex = *(Rex *)*pbuf; + prex = &rex; + (*pbuf)++; + } + } +#endif + if (opcode_len != 0) { + if (memcmp(*pbuf, opcode_ptr, opcode_len)) { + continue; + } + *pbuf += opcode_len; + } + if (odesc.aux0 != 0) { + + if (!decode_aux(odesc, odesc.aux0, pbuf, pinst +#ifdef _EM64T_ + , prex +#endif + )) { + continue; + } + if (odesc.aux1 != 0) { + if (!decode_aux(odesc, odesc.aux1, pbuf, pinst +#ifdef _EM64T_ + , prex +#endif + )) { + continue; + } + } + pinst->odesc = &opcodes[i]; + return true; + } + else { + // Can't have empty opcode + assert(opcode_len != 0); + pinst->odesc = &opcodes[i]; + return true; + } + } + return false; +} + +bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc, + const unsigned char ** pbuf, Inst * pinst +#ifdef _EM64T_ + , const Rex *rex +#endif + ) +{ + EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; + const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; + + //XXX debug ///assert(0x66 != *(*pbuf-2)); + const ModRM& modrm = *(ModRM*)*pbuf; + *pbuf += 1; + + RegName base = RegName_Null; + RegName index = RegName_Null; + int disp = 0; + unsigned scale = 0; + + // On x86_64 all mnemonics that allow REX.W have REX.W in opcode. + // Therefore REX.W is simply ignored, and opndDesc.size is used + + if (modrm.mod == 3) { + // we have only modrm. no sib, no disp. + // Android x86: Use XMMReg for 64b operand. + OpndKind okind = ((opndDesc.kind & OpndKind_XMMReg) || opndDesc.size == OpndSize_64) ? OpndKind_XMMReg : OpndKind_GPReg; + RegName reg = getRegName(okind, opndDesc.size, EXTEND_REG(modrm.rm, b)); + opnd = EncoderBase::Operand(reg); + return true; + } + //Android x86: m16, m32, m64: mean a byte[word|doubleword] operand in memory + //base and index should be 32 bits!!! + const SIB& sib = *(SIB*)*pbuf; + // check whether we have a sib + if (modrm.rm == 4) { + // yes, we have SIB + *pbuf += 1; + if (sib.index != 4) { + index = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.index, x)); //Android x86: OpndDesc.size + } else { + // (sib.index == 4) => no index + //%esp can't be sib.index + } + + // scale = sib.scale == 0 ? 0 : (1<<sib.scale); + // scale = (1<<sib.scale); + scale = (index == RegName_Null) ? 0 : (1<<sib.scale); + + if (sib.base != 5 || modrm.mod != 0) { + base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(sib.base, b)); //Android x86: OpndDesc.size + } else { + // (sib.base == 5 && modrm.mod == 0) => no base + } + } + else { + if (modrm.mod != 0 || modrm.rm != 5) { + base = getRegName(OpndKind_GPReg, OpndSize_32, EXTEND_REG(modrm.rm, b)); //Android x86: OpndDesc.size + } + else { + // mod=0 && rm == 5 => only disp32 + } + } + + //update disp and pbuf + if (modrm.mod == 2) { + // have disp32 + disp = *(int*)*pbuf; + *pbuf += 4; + } + else if (modrm.mod == 1) { + // have disp8 + disp = *(char*)*pbuf; + *pbuf += 1; + } + else { + assert(modrm.mod == 0); + if (modrm.rm == 5) { + // have disp32 w/o sib + disp = *(int*)*pbuf; + *pbuf += 4; + } + else if (modrm.rm == 4 && sib.base == 5) { + // have disp32 with SI in sib + disp = *(int*)*pbuf; + *pbuf += 4; + } + } + opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp); + return true; +} diff --git a/libpixelflinger/codeflinger/x86/libenc/dec_base.h b/libpixelflinger/codeflinger/x86/libenc/dec_base.h new file mode 100644 index 0000000..f1fa123 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/dec_base.h @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ + +/** + * @file + * @brief Main decoding (disassembling) routines and structures. + * + * @note Quick and rough implementation, subject for a change. + */ + +#ifndef __DEC_BASE_H_INCLUDED__ +#define __DEC_BASE_H_INCLUDED__ + + +#include "enc_base.h" +#include "enc_prvt.h" + +#ifdef ENCODER_ISOLATE +using namespace enc_ia32; +#endif + +#define IF_CONDITIONAL (0x00000000) +#define IF_SYMMETRIC (0x00000000) +#define IF_BRANCH (0x00000000) + +struct Inst { + Inst() { + mn = Mnemonic_Null; + prefc = 0; + size = 0; + flags = 0; + //offset = 0; + //direct_addr = NULL; + argc = 0; + for(int i = 0; i < 4; ++i) + { + pref[i] = InstPrefix_Null; + } + } + /** + * Mnemonic of the instruction.s + */ + Mnemonic mn; + /** + * Enumerating of indexes in the pref array. + */ + enum PrefGroups + { + Group1 = 0, + Group2, + Group3, + Group4 + }; + /** + * Number of prefixes (1 byte each). + */ + unsigned int prefc; + /** + * Instruction prefixes. Prefix should be placed here according to its group. + */ + InstPrefix pref[4]; + /** + * Size, in bytes, of the instruction. + */ + unsigned size; + /** + * Flags of the instruction. + * @see MF_ + */ + unsigned flags; + /** + * An offset of target address, in case of 'CALL offset', + * 'JMP/Jcc offset'. + */ + //int offset; + /** + * Direct address of the target (on Intel64/IA-32 is 'instruction IP' + + * 'instruction length' + offset). + */ + //void * direct_addr; + /** + * Number of arguments of the instruction. + */ + unsigned argc; + // + EncoderBase::Operand operands[3]; + // + const EncoderBase::OpcodeDesc * odesc; +}; + +inline bool is_jcc(Mnemonic mn) +{ + return Mnemonic_JO <= mn && mn<=Mnemonic_JG; +} + +class DecoderBase { +public: + static unsigned decode(const void * addr, Inst * pinst); +private: + static bool decodeModRM(const EncoderBase::OpcodeDesc& odesc, + const unsigned char ** pbuf, Inst * pinst +#ifdef _EM64T_ + , const Rex *rex +#endif + ); + static bool decode_aux(const EncoderBase::OpcodeDesc& odesc, + unsigned aux, const unsigned char ** pbuf, + Inst * pinst +#ifdef _EM64T_ + , const Rex *rex +#endif + ); + static bool try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst); + static unsigned int fill_prefs( const unsigned char * bytes, Inst * pinst); + static bool is_prefix(const unsigned char * bytes); +}; + +#endif // ~ __DEC_BASE_H_INCLUDED__ diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_base.cpp b/libpixelflinger/codeflinger/x86/libenc/enc_base.cpp new file mode 100644 index 0000000..0562ce8 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.cpp @@ -0,0 +1,1137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ +#include "enc_base.h" +//#include <climits> +#include <string.h> +#define USE_ENCODER_DEFINES +#include "enc_prvt.h" +#include <stdio.h> + +//#define JET_PROTO + +#ifdef JET_PROTO +#include "dec_base.h" +#include "jvmti_dasm.h" +#endif + +ENCODER_NAMESPACE_START + +/** + * @file + * @brief Main encoding routines and structures. + */ + +#ifndef _WIN32 + #define strcmpi strcasecmp +#endif + +int EncoderBase::dummy = EncoderBase::buildTable(); + +const unsigned char EncoderBase::size_hash[OpndSize_64+1] = { + // + 0xFF, // OpndSize_Null = 0, + 3, // OpndSize_8 = 0x1, + 2, // OpndSize_16 = 0x2, + 0xFF, // 0x3 + 1, // OpndSize_32 = 0x4, + 0xFF, // 0x5 + 0xFF, // 0x6 + 0xFF, // 0x7 + 0, // OpndSize_64 = 0x8, + // +}; + +const unsigned char EncoderBase::kind_hash[OpndKind_Mem+1] = { + // + //gp reg -> 000 = 0 + //memory -> 001 = 1 + //immediate -> 010 = 2 + //xmm reg -> 011 = 3 + //segment regs -> 100 = 4 + //fp reg -> 101 = 5 + //mmx reg -> 110 = 6 + // + 0xFF, // 0 OpndKind_Null=0, + 0<<2, // 1 OpndKind_GPReg = + // OpndKind_MinRegKind=0x1, + 4<<2, // 2 OpndKind_SReg=0x2, + +#ifdef _HAVE_MMX_ + 6<<2, // 3 +#else + 0xFF, // 3 +#endif + + 5<<2, // 4 OpndKind_FPReg=0x4, + 0xFF, 0xFF, 0xFF, // 5, 6, 7 + 3<<2, // OpndKind_XMMReg=0x8, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 9, 0xA, 0xB, 0xC, 0xD, + // 0xE, 0xF + 0xFF, // OpndKind_MaxRegKind = + // OpndKind_StatusReg = + // OpndKind_OtherReg=0x10, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x11-0x18 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x19-0x1F + 2<<2, // OpndKind_Immediate=0x20, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x21-0x28 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x29-0x30 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x31-0x38 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 0x39-0x3F + 1<<2, // OpndKind_Memory=0x40 +}; + +char * EncoderBase::curRelOpnd[3]; + +char* EncoderBase::encode_aux(char* stream, unsigned aux, + const Operands& opnds, const OpcodeDesc * odesc, + unsigned * pargsCount, Rex * prex) +{ + const unsigned byte = aux; + OpcodeByteKind kind = (OpcodeByteKind)(byte & OpcodeByteKind_KindMask); + // The '>>' here is to force the switch to be table-based) instead of + // set of CMP+Jcc. + if (*pargsCount >= COUNTOF(opnds)) { + assert(false); + return stream; + } + switch(kind>>8) { + case OpcodeByteKind_SlashR>>8: + // /r - Indicates that the ModR/M byte of the instruction contains + // both a register operand and an r/m operand. + { + assert(opnds.count() > 1); + // not true anymore for MOVQ xmm<->r + //assert((odesc->opnds[0].kind & OpndKind_Mem) || + // (odesc->opnds[1].kind & OpndKind_Mem)); + unsigned memidx = odesc->opnds[0].kind & OpndKind_Mem ? 0 : 1; + unsigned regidx = memidx == 0 ? 1 : 0; + memidx += *pargsCount; + regidx += *pargsCount; + ModRM& modrm = *(ModRM*)stream; + if (memidx >= COUNTOF(opnds) || regidx >= COUNTOF(opnds)) { + assert(false); + break; + } + if (opnds[memidx].is_mem()) { + stream = encodeModRM(stream, opnds, memidx, odesc, prex); + } + else { + modrm.mod = 3; // 11 + modrm.rm = getHWRegIndex(opnds[memidx].reg()); +#ifdef _EM64T_ + if (opnds[memidx].need_rex() && needs_rex_r(opnds[memidx].reg())) { + prex->b = 1; + } +#endif + ++stream; + } + modrm.reg = getHWRegIndex(opnds[regidx].reg()); +#ifdef _EM64T_ + if (opnds[regidx].need_rex() && needs_rex_r(opnds[regidx].reg())) { + prex->r = 1; + } +#endif + *pargsCount += 2; + } + break; + case OpcodeByteKind_SlashNum>>8: + // /digit - A digit between 0 and 7 indicates that the + // ModR/M byte of the instruction uses only the r/m + // (register or memory) operand. The reg field contains + // the digit that provides an extension to the instruction's + // opcode. + { + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); + assert(lowByte <= 7); + ModRM& modrm = *(ModRM*)stream; + unsigned idx = *pargsCount; + assert(opnds[idx].is_mem() || opnds[idx].is_reg()); + if (opnds[idx].is_mem()) { + stream = encodeModRM(stream, opnds, idx, odesc, prex); + } + else { + modrm.mod = 3; // 11 + modrm.rm = getHWRegIndex(opnds[idx].reg()); +#ifdef _EM64T_ + if (opnds[idx].need_rex() && needs_rex_r(opnds[idx].reg())) { + prex->b = 1; + } +#endif + ++stream; + } + modrm.reg = (char)lowByte; + *pargsCount += 1; + } + break; + case OpcodeByteKind_plus_i>>8: + // +i - A number used in floating-point instructions when one + // of the operands is ST(i) from the FPU register stack. The + // number i (which can range from 0 to 7) is added to the + // hexadecimal byte given at the left of the plus sign to form + // a single opcode byte. + { + unsigned idx = *pargsCount; + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); + *stream = (char)lowByte + getHWRegIndex(opnds[idx].reg()); + ++stream; + *pargsCount += 1; + } + break; + case OpcodeByteKind_ib>>8: + case OpcodeByteKind_iw>>8: + case OpcodeByteKind_id>>8: +#ifdef _EM64T_ + case OpcodeByteKind_io>>8: +#endif //_EM64T_ + // ib, iw, id - A 1-byte (ib), 2-byte (iw), or 4-byte (id) + // immediate operand to the instruction that follows the + // opcode, ModR/M bytes or scale-indexing bytes. The opcode + // determines if the operand is a signed value. All words + // and double words are given with the low-order byte first. + { + unsigned idx = *pargsCount; + *pargsCount += 1; + assert(opnds[idx].is_imm()); + if (kind == OpcodeByteKind_ib) { + *(unsigned char*)stream = (unsigned char)opnds[idx].imm(); + curRelOpnd[idx] = stream; + stream += 1; + } + else if (kind == OpcodeByteKind_iw) { + *(unsigned short*)stream = (unsigned short)opnds[idx].imm(); + curRelOpnd[idx] = stream; + stream += 2; + } + else if (kind == OpcodeByteKind_id) { + *(unsigned*)stream = (unsigned)opnds[idx].imm(); + curRelOpnd[idx] = stream; + stream += 4; + } +#ifdef _EM64T_ + else { + assert(kind == OpcodeByteKind_io); + *(long long*)stream = (long long)opnds[idx].imm(); + curRelOpnd[idx] = stream; + stream += 8; + } +#else + else { + assert(false); + } +#endif + } + break; + case OpcodeByteKind_cb>>8: + assert(opnds[*pargsCount].is_imm()); + *(unsigned char*)stream = (unsigned char)opnds[*pargsCount].imm(); + curRelOpnd[*pargsCount]= stream; + stream += 1; + *pargsCount += 1; + break; + case OpcodeByteKind_cw>>8: + assert(opnds[*pargsCount].is_imm()); + *(unsigned short*)stream = (unsigned short)opnds[*pargsCount].imm(); + curRelOpnd[*pargsCount]= stream; + stream += 2; + *pargsCount += 1; + break; + case OpcodeByteKind_cd>>8: + assert(opnds[*pargsCount].is_imm()); + *(unsigned*)stream = (unsigned)opnds[*pargsCount].imm(); + curRelOpnd[*pargsCount]= stream; + stream += 4; + *pargsCount += 1; + break; + //OpcodeByteKind_cp = 0x0B00, + //OpcodeByteKind_co = 0x0C00, + //OpcodeByteKind_ct = 0x0D00, + case OpcodeByteKind_rb>>8: + case OpcodeByteKind_rw>>8: + case OpcodeByteKind_rd>>8: + // +rb, +rw, +rd - A register code, from 0 through 7, + // added to the hexadecimal byte given at the left of + // the plus sign to form a single opcode byte. + assert(opnds.count() > 0); + assert(opnds[*pargsCount].is_reg()); + { + const unsigned lowByte = (byte & OpcodeByteKind_OpcodeMask); + *(unsigned char*)stream = (unsigned char)lowByte + + getHWRegIndex(opnds[*pargsCount].reg()); +#ifdef _EM64T_ + if (opnds[*pargsCount].need_rex() && needs_rex_r(opnds[*pargsCount].reg())) { + prex->b = 1; + } +#endif + ++stream; + *pargsCount += 1; + } + break; + default: + assert(false); + break; + } + return stream; +} + +char * EncoderBase::encode(char * stream, Mnemonic mn, const Operands& opnds) +{ +#ifdef _DEBUG + if (opnds.count() > 0) { + if (opnds[0].is_mem()) { + assert(getRegKind(opnds[0].base()) != OpndKind_SReg); + } + else if (opnds.count() >1 && opnds[1].is_mem()) { + assert(getRegKind(opnds[1].base()) != OpndKind_SReg); + } + } +#endif + +#ifdef JET_PROTO + char* saveStream = stream; +#endif + + const OpcodeDesc * odesc = lookup(mn, opnds); +#if !defined(_EM64T_) + bool copy_opcode = true; + Rex *prex = NULL; +#else + // We need rex if + // either of registers used as operand or address form is new extended register + // it's explicitly specified by opcode + // So, if we don't have REX in opcode but need_rex, then set rex here + // otherwise, wait until opcode is set, and then update REX + + bool copy_opcode = true; + unsigned char _1st = odesc->opcode[0]; + + Rex *prex = (Rex*)stream; + if (opnds.need_rex() && + ((_1st == 0x66) || (_1st == 0xF2 || _1st == 0xF3) && odesc->opcode[1] == 0x0F)) { + // Special processing + // + copy_opcode = false; + // + *(unsigned char*)stream = _1st; + ++stream; + // + prex = (Rex*)stream; + prex->dummy = 4; + prex->w = 0; + prex->b = 0; + prex->x = 0; + prex->r = 0; + ++stream; + // + memcpy(stream, &odesc->opcode[1], odesc->opcode_len-1); + stream += odesc->opcode_len-1; + } + else if (_1st != 0x48 && opnds.need_rex()) { + prex = (Rex*)stream; + prex->dummy = 4; + prex->w = 0; + prex->b = 0; + prex->x = 0; + prex->r = 0; + ++stream; + } +#endif // ifndef EM64T + + if (copy_opcode) { + if (odesc->opcode_len==1) { + unsigned char *dest = (unsigned char *) (stream); + unsigned char *src = (unsigned char *) (& (odesc->opcode)); + *dest = *src; + } + else if (odesc->opcode_len==2) { + short *dest = (short *) (stream); + void *ptr = (void *) (& (odesc->opcode)); + short *src = (short *) (ptr); + *dest = *src; + } + else if (odesc->opcode_len==3) { + unsigned short *dest = (unsigned short *) (stream); + void *ptr = (void *) (& (odesc->opcode)); + unsigned short *src = (unsigned short *) (ptr); + *dest = *src; + + //Now handle the last part + unsigned char *dest2 = (unsigned char *) (stream + 2); + *dest2 = odesc->opcode[2]; + } + else if (odesc->opcode_len==4) { + unsigned int *dest = (unsigned int *) (stream); + void *ptr = (void *) (& (odesc->opcode)); + unsigned int *src = (unsigned int *) (ptr); + *dest = *src; + } + stream += odesc->opcode_len; + } + + unsigned argsCount = odesc->first_opnd; + + if (odesc->aux0) { + stream = encode_aux(stream, odesc->aux0, opnds, odesc, &argsCount, prex); + if (odesc->aux1) { + stream = encode_aux(stream, odesc->aux1, opnds, odesc, &argsCount, prex); + } + } +#ifdef JET_PROTO + //saveStream + Inst inst; + unsigned len = DecoderBase::decode(saveStream, &inst); + assert(inst.mn == mn); + assert(len == (unsigned)(stream-saveStream)); + if (mn == Mnemonic_CALL || mn == Mnemonic_JMP || + Mnemonic_RET == mn || + (Mnemonic_JO<=mn && mn<=Mnemonic_JG)) { + assert(inst.argc == opnds.count()); + + InstructionDisassembler idi(saveStream); + + for (unsigned i=0; i<inst.argc; i++) { + const EncoderBase::Operand& original = opnds[i]; + const EncoderBase::Operand& decoded = inst.operands[i]; + assert(original.kind() == decoded.kind()); + assert(original.size() == decoded.size()); + if (original.is_imm()) { + assert(original.imm() == decoded.imm()); + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Imm); + if (mn == Mnemonic_CALL) { + assert(idi.get_type() == InstructionDisassembler::RELATIVE_CALL); + } + else if (mn == Mnemonic_JMP) { + assert(idi.get_type() == InstructionDisassembler::RELATIVE_JUMP); + } + else if (mn == Mnemonic_RET) { + assert(idi.get_type() == InstructionDisassembler::RET); + } + else { + assert(idi.get_type() == InstructionDisassembler::RELATIVE_COND_JUMP); + } + } + else if (original.is_mem()) { + assert(original.base() == decoded.base()); + assert(original.index() == decoded.index()); + assert(original.scale() == decoded.scale()); + assert(original.disp() == decoded.disp()); + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Mem); + if (mn == Mnemonic_CALL) { + assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL); + } + else if (mn == Mnemonic_JMP) { + assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP); + } + else { + assert(false); + } + } + else { + assert(original.is_reg()); + assert(original.reg() == decoded.reg()); + assert(idi.get_opnd(0).kind == InstructionDisassembler::Kind_Reg); + if (mn == Mnemonic_CALL) { + assert(idi.get_type() == InstructionDisassembler::INDIRECT_CALL); + } + else if (mn == Mnemonic_JMP) { + assert(idi.get_type() == InstructionDisassembler::INDIRECT_JUMP); + } + else { + assert(false); + } + } + } + + Inst inst2; + len = DecoderBase::decode(saveStream, &inst2); + } + + // if(idi.get_length_with_prefix() != (int)len) { + //__asm { int 3 }; + // } +#endif + + return stream; +} + +char* EncoderBase::encodeModRM(char* stream, const Operands& opnds, + unsigned idx, const OpcodeDesc * odesc, + Rex * prex) +{ + const Operand& op = opnds[idx]; + assert(op.is_mem()); + assert(idx < COUNTOF(curRelOpnd)); + ModRM& modrm = *(ModRM*)stream; + ++stream; + SIB& sib = *(SIB*)stream; + + // we need SIB if + // we have index & scale (nb: having index w/o base and w/o scale + // treated as error) + // the base is EBP w/o disp, BUT let's use a fake disp8 + // the base is ESP (nb: cant have ESP as index) + + RegName base = op.base(); + // only disp ?.. + if (base == RegName_Null && op.index() == RegName_Null) { + assert(op.scale() == 0); // 'scale!=0' has no meaning without index + // ... yes - only have disp + // On EM64T, the simply [disp] addressing means 'RIP-based' one - + // must have to use SIB to encode 'DS: based' +#ifdef _EM64T_ + modrm.mod = 0; // 00 - .. + modrm.rm = 4; // 100 - have SIB + + sib.base = 5; // 101 - none + sib.index = 4; // 100 - none + sib.scale = 0; // + ++stream; // bypass SIB +#else + // ignore disp_fits8, always use disp32. + modrm.mod = 0; + modrm.rm = 5; +#endif + *(unsigned*)stream = (unsigned)op.disp(); + curRelOpnd[idx]= stream; + stream += 4; + return stream; + } + + //climits: error when targeting compal +#define CHAR_MIN -127 +#define CHAR_MAX 127 + const bool disp_fits8 = CHAR_MIN <= op.disp() && op.disp() <= CHAR_MAX; + /*&& op.base() != RegName_Null - just checked above*/ + if (op.index() == RegName_Null && getHWRegIndex(op.base()) != getHWRegIndex(REG_STACK)) { + assert(op.scale() == 0); // 'scale!=0' has no meaning without index + // ... luckily no SIB, only base and may be a disp + + // EBP base is a special case. Need to use [EBP] + disp8 form + if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) { + modrm.mod = 0; // mod=00, no disp et all + } + else if (disp_fits8) { + modrm.mod = 1; // mod=01, use disp8 + *(unsigned char*)stream = (unsigned char)op.disp(); + curRelOpnd[idx]= stream; + ++stream; + } + else { + modrm.mod = 2; // mod=10, use disp32 + *(unsigned*)stream = (unsigned)op.disp(); + curRelOpnd[idx]= stream; + stream += 4; + } + modrm.rm = getHWRegIndex(op.base()); + if (is_em64t_extra_reg(op.base())) { + prex->b = 1; + } + return stream; + } + + // cool, we do have SIB. + ++stream; // bypass SIB in stream + + // {E|R}SP cannot be scaled index, however, R12 which has the same index in modrm - can + assert(op.index() == RegName_Null || !equals(op.index(), REG_STACK)); + + // Only GPRegs can be encoded in the SIB + assert(op.base() == RegName_Null || + getRegKind(op.base()) == OpndKind_GPReg); + assert(op.index() == RegName_Null || + getRegKind(op.index()) == OpndKind_GPReg); + + modrm.rm = 4; // r/m = 100, means 'we have SIB here' + if (op.base() == RegName_Null) { + // no base. + // already checked above if + // the first if() //assert(op.index() != RegName_Null); + + modrm.mod = 0; // mod=00 - here it means 'no base, but disp32' + sib.base = 5; // 101 with mod=00 ^^^ + + // encode at least fake disp32 to avoid having [base=ebp] + *(unsigned*)stream = op.disp(); + curRelOpnd[idx]= stream; + stream += 4; + + unsigned sc = op.scale(); + if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 + else if (sc == 2) { sib.scale = 1; } // SS=01 + else if (sc == 4) { sib.scale = 2; } // SS=10 + else if (sc == 8) { sib.scale = 3; } // SS=11 + sib.index = getHWRegIndex(op.index()); + if (is_em64t_extra_reg(op.index())) { + prex->x = 1; + } + + return stream; + } + + if (op.disp() == 0 && getHWRegIndex(op.base()) != getHWRegIndex(RegName_EBP)) { + modrm.mod = 0; // mod=00, no disp + } + else if (disp_fits8) { + modrm.mod = 1; // mod=01, use disp8 + *(unsigned char*)stream = (unsigned char)op.disp(); + curRelOpnd[idx]= stream; + stream += 1; + } + else { + modrm.mod = 2; // mod=10, use disp32 + *(unsigned*)stream = (unsigned)op.disp(); + curRelOpnd[idx]= stream; + stream += 4; + } + + if (op.index() == RegName_Null) { + assert(op.scale() == 0); // 'scale!=0' has no meaning without index + // the only reason we're here without index, is that we have {E|R}SP + // or R12 as a base. Another possible reason - EBP without a disp - + // is handled above by adding a fake disp8 +#ifdef _EM64T_ + assert(op.base() != RegName_Null && (equals(op.base(), REG_STACK) || + equals(op.base(), RegName_R12))); +#else // _EM64T_ + assert(op.base() != RegName_Null && equals(op.base(), REG_STACK)); +#endif //_EM64T_ + sib.scale = 0; // SS = 00 + sib.index = 4; // SS + index=100 means 'no index' + } + else { + unsigned sc = op.scale(); + if (sc == 1 || sc==0) { sib.scale = 0; } // SS=00 + else if (sc == 2) { sib.scale = 1; } // SS=01 + else if (sc == 4) { sib.scale = 2; } // SS=10 + else if (sc == 8) { sib.scale = 3; } // SS=11 + sib.index = getHWRegIndex(op.index()); + if (is_em64t_extra_reg(op.index())) { + prex->x = 1; + } + // not an error by itself, but the usage of [index*1] instead + // of [base] is discouraged + assert(op.base() != RegName_Null || op.scale() != 1); + } + sib.base = getHWRegIndex(op.base()); + if (is_em64t_extra_reg(op.base())) { + prex->b = 1; + } + return stream; +} + +char * EncoderBase::nops(char * stream, unsigned howMany) +{ + // Recommended multi-byte NOPs from the Intel architecture manual + static const unsigned char nops[10][9] = { + { 0, }, // 0, this line is dummy and not used in the loop below + { 0x90, }, // 1-byte NOP + { 0x66, 0x90, }, // 2 + { 0x0F, 0x1F, 0x00, }, // 3 + { 0x0F, 0x1F, 0x40, 0x00, }, // 4 + { 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 5 + { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00, }, // 6 + { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00, }, // 7 + { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, }, // 8 + { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 }, // 9-byte NOP + }; + + // Start from delivering the longest possible NOPs, then proceed with shorter ones + for (unsigned nopSize=9; nopSize!=0; nopSize--) { + while(howMany>=nopSize) { + const unsigned char* nopBytes = nops[nopSize]; + for (unsigned i=0; i<nopSize; i++) { + stream[i] = nopBytes[i]; + } + stream += nopSize; + howMany -= nopSize; + } + } + char* end = stream + howMany; + return end; +} + +char * EncoderBase::prefix(char* stream, InstPrefix pref) +{ + if (pref== InstPrefix_Null) { + // nothing to do + return stream; + } + *stream = (char)pref; + return stream + 1; +} + + +/** + * + */ +bool EncoderBase::extAllowed(OpndExt opndExt, OpndExt instExt) { + if (instExt == opndExt || instExt == OpndExt_Any || opndExt == OpndExt_Any) { + return true; + } +//asm("int3"); +assert(0); + return false; +} + +static bool try_match(const EncoderBase::OpcodeDesc& odesc, + const EncoderBase::Operands& opnds, bool strict) { + + assert(odesc.roles.count == opnds.count()); + + for(unsigned j=0; j<odesc.roles.count; j++) { + // - the location must match exactly + if ((odesc.opnds[j].kind & opnds[j].kind()) != opnds[j].kind()) { + return false; + } + if (strict) { + // the size must match exactly + if (odesc.opnds[j].size != opnds[j].size()) { + return false; + } + } + else { + // must match only for def operands, and dont care about use ones + // situations like 'mov r8, imm32/mov r32, imm8' so the + // destination operand defines the overall size + if (EncoderBase::getOpndRoles(odesc.roles, j) & OpndRole_Def) { + if (odesc.opnds[j].size != opnds[j].size()) { + return false; + } + } + } + } + return true; +} + +// +//Subhash implementaion - may be useful in case of many misses during fast +//opcode lookup. +// + +#ifdef ENCODER_USE_SUBHASH +static unsigned subHash[32]; + +static unsigned find(Mnemonic mn, unsigned hash) +{ + unsigned key = hash % COUNTOF(subHash); + unsigned pack = subHash[key]; + unsigned _hash = pack & 0xFFFF; + if (_hash != hash) { + stat.miss(mn); + return EncoderBase::NOHASH; + } + unsigned _mn = (pack >> 24)&0xFF; + if (_mn != _mn) { + stat.miss(mn); + return EncoderBase::NOHASH; + } + unsigned idx = (pack >> 16) & 0xFF; + stat.hit(mn); + return idx; +} + +static void put(Mnemonic mn, unsigned hash, unsigned idx) +{ + unsigned pack = hash | (idx<<16) | (mn << 24); + unsigned key = hash % COUNTOF(subHash); + subHash[key] = pack; +} +#endif + +const EncoderBase::OpcodeDesc * +EncoderBase::lookup(Mnemonic mn, const Operands& opnds) +{ + const unsigned hash = opnds.hash(); + unsigned opcodeIndex = opcodesHashMap[mn][hash]; +#ifdef ENCODER_USE_SUBHASH + if (opcodeIndex == NOHASH) { + opcodeIndex = find(mn, hash); + } +#endif + + if (opcodeIndex == NOHASH) { + // fast-path did no work. try to lookup sequentially + const OpcodeDesc * odesc = opcodes[mn]; + int idx = -1; + bool found = false; + for (idx=0; !odesc[idx].last; idx++) { + const OpcodeDesc& opcode = odesc[idx]; + if (opcode.platf == OpcodeInfo::decoder) { + continue; + } + if (opcode.roles.count != opnds.count()) { + continue; + } + if (try_match(opcode, opnds, true)) { + found = true; + break; + } + } + if (!found) { + for (idx=0; !odesc[idx].last; idx++) { + const OpcodeDesc& opcode = odesc[idx]; + if (opcode.platf == OpcodeInfo::decoder) { + continue; + } + if (opcode.roles.count != opnds.count()) { + continue; + } + if (try_match(opcode, opnds, false)) { + found = true; + break; + } + } + } + assert(found); + opcodeIndex = idx; +#ifdef ENCODER_USE_SUBHASH + put(mn, hash, opcodeIndex); +#endif + } + assert(opcodeIndex != NOHASH); + const OpcodeDesc * odesc = &opcodes[mn][opcodeIndex]; + assert(!odesc->last); + assert(odesc->roles.count == opnds.count()); + assert(odesc->platf != OpcodeInfo::decoder); +#if !defined(_EM64T_) + // tuning was done for IA32 only, so no size restriction on EM64T + //assert(sizeof(OpcodeDesc)==128); +#endif + return odesc; +} + +char* EncoderBase::getOpndLocation(int index) { + assert(index < 3); + return curRelOpnd[index]; +} + + +Mnemonic EncoderBase::str2mnemonic(const char * mn_name) +{ + for (unsigned m = 1; m<Mnemonic_Count; m++) { + if (!strcmpi(mnemonics[m].name, mn_name)) { + return (Mnemonic)m; + } + } + return Mnemonic_Null; +} + +static const char * conditionStrings[ConditionMnemonic_Count] = { + "O", + "NO", + "B", + "AE", + "Z", + "NZ", + "BE", + "A", + + "S", + "NS", + "P", + "NP", + "L", + "GE", + "LE", + "G", +}; + +const char * getConditionString(ConditionMnemonic cm) { + return conditionStrings[cm]; +} + +static const struct { + char sizeString[12]; + OpndSize size; +} +sizes[] = { + { "Sz8", OpndSize_8 }, + { "Sz16", OpndSize_16 }, + { "Sz32", OpndSize_32 }, + { "Sz64", OpndSize_64 }, +#if !defined(TESTING_ENCODER) + { "Sz80", OpndSize_80 }, + { "Sz128", OpndSize_128 }, +#endif + { "SzAny", OpndSize_Any }, +}; + + +OpndSize getOpndSize(const char * sizeString) +{ + assert(sizeString); + for (unsigned i = 0; i<COUNTOF(sizes); i++) { + if (!strcmpi(sizeString, sizes[i].sizeString)) { + return sizes[i].size; + } + } + return OpndSize_Null; +} + +const char * getOpndSizeString(OpndSize size) { + for( unsigned i = 0; i<COUNTOF(sizes); i++ ) { + if( sizes[i].size==size ) { + return sizes[i].sizeString; + } + } + return NULL; +} + +static const struct { + char kindString[16]; + OpndKind kind; +} +kinds[] = { + { "Null", OpndKind_Null }, + { "GPReg", OpndKind_GPReg }, + { "SReg", OpndKind_SReg }, + { "FPReg", OpndKind_FPReg }, + { "XMMReg", OpndKind_XMMReg }, +#ifdef _HAVE_MMX_ + { "MMXReg", OpndKind_MMXReg }, +#endif + { "StatusReg", OpndKind_StatusReg }, + { "Reg", OpndKind_Reg }, + { "Imm", OpndKind_Imm }, + { "Mem", OpndKind_Mem }, + { "Any", OpndKind_Any }, +}; + +const char * getOpndKindString(OpndKind kind) +{ + for (unsigned i = 0; i<COUNTOF(kinds); i++) { + if (kinds[i].kind==kind) { + return kinds[i].kindString; + } + } + return NULL; +} + +OpndKind getOpndKind(const char * kindString) +{ + assert(kindString); + for (unsigned i = 0; i<COUNTOF(kinds); i++) { + if (!strcmpi(kindString, kinds[i].kindString)) { + return kinds[i].kind; + } + } + return OpndKind_Null; +} + +/** + * A mapping between register string representation and its RegName constant. + */ +static const struct { + char regstring[7]; + RegName regname; +} + +registers[] = { +#ifdef _EM64T_ + {"RAX", RegName_RAX}, + {"RBX", RegName_RBX}, + {"RCX", RegName_RCX}, + {"RDX", RegName_RDX}, + {"RBP", RegName_RBP}, + {"RSI", RegName_RSI}, + {"RDI", RegName_RDI}, + {"RSP", RegName_RSP}, + {"R8", RegName_R8}, + {"R9", RegName_R9}, + {"R10", RegName_R10}, + {"R11", RegName_R11}, + {"R12", RegName_R12}, + {"R13", RegName_R13}, + {"R14", RegName_R14}, + {"R15", RegName_R15}, +#endif + + {"EAX", RegName_EAX}, + {"ECX", RegName_ECX}, + {"EDX", RegName_EDX}, + {"EBX", RegName_EBX}, + {"ESP", RegName_ESP}, + {"EBP", RegName_EBP}, + {"ESI", RegName_ESI}, + {"EDI", RegName_EDI}, +#ifdef _EM64T_ + {"R8D", RegName_R8D}, + {"R9D", RegName_R9D}, + {"R10D", RegName_R10D}, + {"R11D", RegName_R11D}, + {"R12D", RegName_R12D}, + {"R13D", RegName_R13D}, + {"R14D", RegName_R14D}, + {"R15D", RegName_R15D}, +#endif + + {"AX", RegName_AX}, + {"CX", RegName_CX}, + {"DX", RegName_DX}, + {"BX", RegName_BX}, + {"SP", RegName_SP}, + {"BP", RegName_BP}, + {"SI", RegName_SI}, + {"DI", RegName_DI}, + + {"AL", RegName_AL}, + {"CL", RegName_CL}, + {"DL", RegName_DL}, + {"BL", RegName_BL}, +#if !defined(_EM64T_) + {"AH", RegName_AH}, + {"CH", RegName_CH}, + {"DH", RegName_DH}, + {"BH", RegName_BH}, +#else + {"SPL", RegName_SPL}, + {"BPL", RegName_BPL}, + {"SIL", RegName_SIL}, + {"DIL", RegName_DIL}, + {"R8L", RegName_R8L}, + {"R9L", RegName_R9L}, + {"R10L", RegName_R10L}, + {"R11L", RegName_R11L}, + {"R12L", RegName_R12L}, + {"R13L", RegName_R13L}, + {"R14L", RegName_R14L}, + {"R15L", RegName_R15L}, +#endif + {"ES", RegName_ES}, + {"CS", RegName_CS}, + {"SS", RegName_SS}, + {"DS", RegName_DS}, + {"FS", RegName_FS}, + {"GS", RegName_GS}, + + {"FP0", RegName_FP0}, +/* + {"FP1", RegName_FP1}, + {"FP2", RegName_FP2}, + {"FP3", RegName_FP3}, + {"FP4", RegName_FP4}, + {"FP5", RegName_FP5}, + {"FP6", RegName_FP6}, + {"FP7", RegName_FP7}, +*/ + {"FP0S", RegName_FP0S}, + {"FP1S", RegName_FP1S}, + {"FP2S", RegName_FP2S}, + {"FP3S", RegName_FP3S}, + {"FP4S", RegName_FP4S}, + {"FP5S", RegName_FP5S}, + {"FP6S", RegName_FP6S}, + {"FP7S", RegName_FP7S}, + + {"FP0D", RegName_FP0D}, + {"FP1D", RegName_FP1D}, + {"FP2D", RegName_FP2D}, + {"FP3D", RegName_FP3D}, + {"FP4D", RegName_FP4D}, + {"FP5D", RegName_FP5D}, + {"FP6D", RegName_FP6D}, + {"FP7D", RegName_FP7D}, + + {"XMM0", RegName_XMM0}, + {"XMM1", RegName_XMM1}, + {"XMM2", RegName_XMM2}, + {"XMM3", RegName_XMM3}, + {"XMM4", RegName_XMM4}, + {"XMM5", RegName_XMM5}, + {"XMM6", RegName_XMM6}, + {"XMM7", RegName_XMM7}, +#ifdef _EM64T_ + {"XMM8", RegName_XMM8}, + {"XMM9", RegName_XMM9}, + {"XMM10", RegName_XMM10}, + {"XMM11", RegName_XMM11}, + {"XMM12", RegName_XMM12}, + {"XMM13", RegName_XMM13}, + {"XMM14", RegName_XMM14}, + {"XMM15", RegName_XMM15}, +#endif + + + {"XMM0S", RegName_XMM0S}, + {"XMM1S", RegName_XMM1S}, + {"XMM2S", RegName_XMM2S}, + {"XMM3S", RegName_XMM3S}, + {"XMM4S", RegName_XMM4S}, + {"XMM5S", RegName_XMM5S}, + {"XMM6S", RegName_XMM6S}, + {"XMM7S", RegName_XMM7S}, +#ifdef _EM64T_ + {"XMM8S", RegName_XMM8S}, + {"XMM9S", RegName_XMM9S}, + {"XMM10S", RegName_XMM10S}, + {"XMM11S", RegName_XMM11S}, + {"XMM12S", RegName_XMM12S}, + {"XMM13S", RegName_XMM13S}, + {"XMM14S", RegName_XMM14S}, + {"XMM15S", RegName_XMM15S}, +#endif + + {"XMM0D", RegName_XMM0D}, + {"XMM1D", RegName_XMM1D}, + {"XMM2D", RegName_XMM2D}, + {"XMM3D", RegName_XMM3D}, + {"XMM4D", RegName_XMM4D}, + {"XMM5D", RegName_XMM5D}, + {"XMM6D", RegName_XMM6D}, + {"XMM7D", RegName_XMM7D}, +#ifdef _EM64T_ + {"XMM8D", RegName_XMM8D}, + {"XMM9D", RegName_XMM9D}, + {"XMM10D", RegName_XMM10D}, + {"XMM11D", RegName_XMM11D}, + {"XMM12D", RegName_XMM12D}, + {"XMM13D", RegName_XMM13D}, + {"XMM14D", RegName_XMM14D}, + {"XMM15D", RegName_XMM15D}, +#endif + + {"EFLGS", RegName_EFLAGS}, +}; + + +const char * getRegNameString(RegName reg) +{ + for (unsigned i = 0; i<COUNTOF(registers); i++) { + if (registers[i].regname == reg) { + return registers[i].regstring; + } + } + return "(null)"; +} + +RegName getRegName(const char * regname) +{ + if (NULL == regname) { + return RegName_Null; + } + + for (unsigned i = 0; i<COUNTOF(registers); i++) { + if (!strcmpi(regname,registers[i].regstring)) { + return registers[i].regname; + } + } + return RegName_Null; +} + +ENCODER_NAMESPACE_END diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_base.h b/libpixelflinger/codeflinger/x86/libenc/enc_base.h new file mode 100644 index 0000000..fa1062d --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_base.h @@ -0,0 +1,748 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ + +/** + * @file + * @brief Main encoding routines and structures. + */ + +#ifndef __ENC_BASE_H_INCLUDED__ +#define __ENC_BASE_H_INCLUDED__ + +#include "enc_defs.h" + + +#include <stdlib.h> +#include <assert.h> +#include <memory.h> + +ENCODER_NAMESPACE_START +struct MnemonicInfo; +struct OpcodeInfo; +struct Rex; + +/** + * @brief Basic facilities for generation of processor's instructions. + * + * The class EncoderBase represents the basic facilities for the encoding of + * processor's instructions on IA32 and EM64T platforms. + * + * The class provides general interface to generate the instructions as well + * as to retrieve some static data about instructions (number of arguments, + * their roles, etc). + * + * Currently, the EncoderBase class is used for both LIL and Jitrino code + * generators. Each of these code generators has its own wrapper to adapt + * this general interface for specific needs - see encoder.h for LIL wrappers + * and Ia32Encoder.h for Jitrino's adapter. + * + * Interface is provided through static methods, no instances of EncoderBase + * to be created. + * + * @todo RIP-based addressing on EM64T - it's not yet supported currently. + */ +class EncoderBase { +public: + class Operands; + struct MnemonicDesc; + /** + * @brief Generates processor's instruction. + * + * @param stream - a buffer to generate into + * @param mn - \link Mnemonic mnemonic \endlink of the instruction + * @param opnds - operands for the instruction + * @returns (stream + length of the just generated instruction) + */ + static char * encode(char * stream, Mnemonic mn, const Operands& opnds); + static char * getOpndLocation(int index); + + /** + * @brief Generates the smallest possible number of NOP-s. + * + * Effectively generates the smallest possible number of instructions, + * which are NOP-s for CPU. Normally used to make a code alignment. + * + * The method inserts exactly number of bytes specified. It's a caller's + * responsibility to make sure the buffer is big enough. + * + * @param stream - buffer where to generate code into, can not be NULL + * @param howMany - how many bytes to fill with NOP-s + * @return \c (stream+howMany) + */ + static char * nops(char * stream, unsigned howMany); + + /** + * @brief Inserts a prefix into the code buffer. + * + * The method writes no more than one byte into the buffer. This is a + * caller's responsibility to make sure the buffer is big enough. + * + * @param stream - buffer where to insert the prefix + * @param pref - prefix to be inserted. If it's InstPrefix_Null, then + * no action performed and return value is \c stream. + * @return \c (stream+1) if pref is not InstPrefix_Null, or \c stream + * otherwise + */ + static char * prefix(char* stream, InstPrefix pref); + + /** + * @brief Determines if operand with opndExt suites the position with instExt. + */ + static bool extAllowed(OpndExt opndExt, OpndExt instExt); + + /** + * @brief Returns MnemonicDesc by the given Mnemonic. + */ + static const MnemonicDesc * getMnemonicDesc(Mnemonic mn) + { + assert(mn < Mnemonic_Count); + return mnemonics + mn; + } + + /** + * @brief Returns a Mnemonic for the given name. + * + * The lookup is case insensitive, if no mnemonic found for the given + * string, then Mnemonic_Null returned. + */ + static Mnemonic str2mnemonic(const char * mn_name); + + /** + * @brief Returns a string representation of the given Mnemonic. + * + * If invalid mnemonic passed, then the behavior is unpredictable. + */ + static const char * getMnemonicString(Mnemonic mn) + { + return getMnemonicDesc(mn)->name; + } + + static const char * toStr(Mnemonic mn) + { + return getMnemonicDesc(mn)->name; + } + + + /** + * @brief Description of operand. + * + * Description of an operand in opcode - its kind, size or RegName if + * operand must be a particular register. + */ + struct OpndDesc { + /** + * @brief Location of the operand. + * + * May be a mask, i.e. OpndKind_Imm|OpndKind_Mem. + */ + OpndKind kind; + /** + * @brief Size of the operand. + */ + OpndSize size; + /** + * @brief Extention of the operand. + */ + OpndExt ext; + /** + * @brief Appropriate RegName if operand must reside on a particular + * register (i.e. CWD/CDQ instructions), RegName_Null + * otherwise. + */ + RegName reg; + }; + + /** + * @brief Description of operands' roles in instruction. + */ + struct OpndRolesDesc { + /** + * @brief Total number of operands in the operation. + */ + unsigned count; + /** + * @brief Number of defs in the operation. + */ + unsigned defCount; + /** + * @brief Number of uses in the operation. + */ + unsigned useCount; + /** + * @brief Operand roles, bit-packed. + * + * A bit-packed info about operands' roles. Each operand's role is + * described by two bits, counted from right-to-left - the less + * significant bits (0,1) represent operand#0. + * + * The mask is build by ORing #OpndRole_Def and #OpndRole_Use + * appropriately and shifting left, i.e. operand#0's role would be + * - '(OpndRole_Def|OpndRole_Use)' + * - opnd#1's role would be 'OpndRole_Use<<2' + * - and operand#2's role would be, say, 'OpndRole_Def<<4'. + */ + unsigned roles; + }; + + /** + * @brief Extracts appropriate OpndRole for a given operand. + * + * The order of operands is left-to-right, i.e. for MOV, it + * would be 'MOV op0, op1' + */ + static OpndRole getOpndRoles(OpndRolesDesc ord, unsigned idx) + { + assert(idx < ord.count); + return (OpndRole)(ord.roles>>((ord.count-1-idx)*2) & 0x3); + } + + /** + * @brief Defines the maximum number of operands for an opcode. + * + * The 3 mostly comes from IDIV/IMUL which both may have up to + * 3 operands. + */ + static const unsigned int MAX_NUM_OPCODE_OPERANDS = 3; + + /** + * @brief Info about single opcode - its opcode bytes, operands, + * operands' roles. + */ + union OpcodeDesc { + char dummy[128]; // To make total size a power of 2 + + struct { + /** + * @brief Raw opcode bytes. + * + * 'Raw' opcode bytes which do not require any analysis and are + * independent from arguments/sizes/etc (may include opcode size + * prefix). + */ + char opcode[5]; + unsigned opcode_len; + unsigned aux0; + unsigned aux1; + /** + * @brief Info about opcode's operands. + */ + OpndDesc opnds[MAX_NUM_OPCODE_OPERANDS]; + unsigned first_opnd; + /** + * @brief Info about operands - total number, number of uses/defs, + * operands' roles. + */ + OpndRolesDesc roles; + /** + * @brief If not zero, then this is final OpcodeDesc structure in + * the list of opcodes for a given mnemonic. + */ + char last; + char platf; + }; + }; +public: + /** + * @brief General info about mnemonic. + */ + struct MnemonicDesc { + /** + * @brief The mnemonic itself. + */ + Mnemonic mn; + /** + * Various characteristics of mnemonic. + * @see MF_ + */ + unsigned flags; + /** + * @brief Operation's operand's count and roles. + * + * For the operations whose opcodes may use different number of + * operands (i.e. IMUL/SHL) either most common value used, or empty + * value left. + */ + OpndRolesDesc roles; + /** + * @brief Print name of the mnemonic. + */ + const char * name; + }; + + + /** + * @brief Magic number, shows a maximum value a hash code can take. + * + * For meaning and arithmetics see enc_tabl.cpp. + * + * The value was increased from '5155' to '8192' to make it aligned + * for faster access in EncoderBase::lookup(). + * + * It was further increased to 16384 as support for 3 operand opcodes + * with XMM registers were added + */ + static const unsigned int HASH_MAX = 16384; //5155; + /** + * @brief Empty value, used in hash-to-opcode map to show an empty slot. + */ + static const unsigned char NOHASH = 0xFF; + /** + * @brief The name says it all. + */ + static const unsigned char HASH_BITS_PER_OPERAND = 5; + + /** + * @brief Contains info about a single instructions's operand - its + * location, size and a value for immediate or RegName for + * register operands. + */ + class Operand { + public: + /** + * @brief Initializes the instance with empty size and kind. + */ + Operand() : m_kind(OpndKind_Null), m_size(OpndSize_Null), m_ext(OpndExt_None), m_need_rex(false) {} + /** + * @brief Creates register operand from given RegName. + */ + Operand(RegName reg, OpndExt ext = OpndExt_None) : m_kind(getRegKind(reg)), + m_size(getRegSize(reg)), + m_ext(ext), m_reg(reg) + { + hash_it(); + } + /** + * @brief Creates register operand from given RegName and with the + * specified size and kind. + * + * Used to speedup Operand creation as there is no need to extract + * size and kind from the RegName. + * The provided size and kind must match the RegName's ones though. + */ + Operand(OpndSize sz, OpndKind kind, RegName reg, OpndExt ext = OpndExt_None) : + m_kind(kind), m_size(sz), m_ext(ext), m_reg(reg) + { + assert(m_size == getRegSize(reg)); + assert(m_kind == getRegKind(reg)); + hash_it(); + } + /** + * @brief Creates immediate operand with the given size and value. + */ + Operand(OpndSize size, long long ival, OpndExt ext = OpndExt_None) : + m_kind(OpndKind_Imm), m_size(size), m_ext(ext), m_imm64(ival) + { + hash_it(); + } + /** + * @brief Creates immediate operand of OpndSize_32. + */ + Operand(int ival, OpndExt ext = OpndExt_None) : + m_kind(OpndKind_Imm), m_size(OpndSize_32), m_ext(ext), m_imm64(ival) + { + hash_it(); + } + /** + * @brief Creates immediate operand of OpndSize_16. + */ + Operand(short ival, OpndExt ext = OpndExt_None) : + m_kind(OpndKind_Imm), m_size(OpndSize_16), m_ext(ext), m_imm64(ival) + { + hash_it(); + } + + /** + * @brief Creates immediate operand of OpndSize_8. + */ + Operand(char ival, OpndExt ext = OpndExt_None) : + m_kind(OpndKind_Imm), m_size(OpndSize_8), m_ext(ext), m_imm64(ival) + { + hash_it(); + } + + /** + * @brief Creates memory operand. + */ + Operand(OpndSize size, RegName base, RegName index, unsigned scale, + int disp, OpndExt ext = OpndExt_None) : m_kind(OpndKind_Mem), m_size(size), m_ext(ext) + { + m_base = base; + m_index = index; + m_scale = scale; + m_disp = disp; + hash_it(); + } + + /** + * @brief Creates memory operand with only base and displacement. + */ + Operand(OpndSize size, RegName base, int disp, OpndExt ext = OpndExt_None) : + m_kind(OpndKind_Mem), m_size(size), m_ext(ext) + { + m_base = base; + m_index = RegName_Null; + m_scale = 0; + m_disp = disp; + hash_it(); + } + // + // general info + // + /** + * @brief Returns kind of the operand. + */ + OpndKind kind(void) const { return m_kind; } + /** + * @brief Returns size of the operand. + */ + OpndSize size(void) const { return m_size; } + /** + * @brief Returns extention of the operand. + */ + OpndExt ext(void) const { return m_ext; } + /** + * @brief Returns hash of the operand. + */ + unsigned hash(void) const { return m_hash; } + // +#ifdef _EM64T_ + bool need_rex(void) const { return m_need_rex; } +#else + bool need_rex(void) const { return false; } +#endif + /** + * @brief Tests whether operand is memory operand. + */ + bool is_mem(void) const { return is_placed_in(OpndKind_Mem); } + /** + * @brief Tests whether operand is immediate operand. + */ + bool is_imm(void) const { return is_placed_in(OpndKind_Imm); } + /** + * @brief Tests whether operand is register operand. + */ + bool is_reg(void) const { return is_placed_in(OpndKind_Reg); } + /** + * @brief Tests whether operand is general-purpose register operand. + */ + bool is_gpreg(void) const { return is_placed_in(OpndKind_GPReg); } + /** + * @brief Tests whether operand is float-point pseudo-register operand. + */ + bool is_fpreg(void) const { return is_placed_in(OpndKind_FPReg); } + /** + * @brief Tests whether operand is XMM register operand. + */ + bool is_xmmreg(void) const { return is_placed_in(OpndKind_XMMReg); } +#ifdef _HAVE_MMX_ + /** + * @brief Tests whether operand is MMX register operand. + */ + bool is_mmxreg(void) const { return is_placed_in(OpndKind_MMXReg); } +#endif + /** + * @brief Tests whether operand is signed immediate operand. + */ + //bool is_signed(void) const { assert(is_imm()); return m_is_signed; } + + /** + * @brief Returns base of memory operand (RegName_Null if not memory). + */ + RegName base(void) const { return is_mem() ? m_base : RegName_Null; } + /** + * @brief Returns index of memory operand (RegName_Null if not memory). + */ + RegName index(void) const { return is_mem() ? m_index : RegName_Null; } + /** + * @brief Returns scale of memory operand (0 if not memory). + */ + unsigned scale(void) const { return is_mem() ? m_scale : 0; } + /** + * @brief Returns displacement of memory operand (0 if not memory). + */ + int disp(void) const { return is_mem() ? m_disp : 0; } + /** + * @brief Returns RegName of register operand (RegName_Null if not + * register). + */ + RegName reg(void) const { return is_reg() ? m_reg : RegName_Null; } + /** + * @brief Returns value of immediate operand (0 if not immediate). + */ + long long imm(void) const { return is_imm() ? m_imm64 : 0; } + private: + bool is_placed_in(OpndKind kd) const + { + return kd == OpndKind_Reg ? + m_kind == OpndKind_GPReg || +#ifdef _HAVE_MMX_ + m_kind == OpndKind_MMXReg || +#endif + m_kind == OpndKind_FPReg || + m_kind == OpndKind_XMMReg + : kd == m_kind; + } + void hash_it(void) + { + m_hash = get_size_hash(m_size) | get_kind_hash(m_kind); +#ifdef _EM64T_ + m_need_rex = false; + if (is_reg() && is_em64t_extra_reg(m_reg)) { + m_need_rex = true; + } + else if (is_mem() && (is_em64t_extra_reg(m_base) || + is_em64t_extra_reg(m_index))) { + m_need_rex = true; + } +#endif + } + // general info + OpndKind m_kind; + OpndSize m_size; + OpndExt m_ext; + // complex address form support + RegName m_base; + RegName m_index; + unsigned m_scale; + union { + int m_disp; + RegName m_reg; + long long m_imm64; + }; + unsigned m_hash; + bool m_need_rex; + friend class EncoderBase::Operands; + }; + /** + * @brief Simple container for up to 3 Operand-s. + */ + class Operands { + public: + Operands(void) + { + clear(); + } + Operands(const Operand& op0) + { + clear(); + add(op0); + } + + Operands(const Operand& op0, const Operand& op1) + { + clear(); + add(op0); add(op1); + } + + Operands(const Operand& op0, const Operand& op1, const Operand& op2) + { + clear(); + add(op0); add(op1); add(op2); + } + + unsigned count(void) const { return m_count; } + unsigned hash(void) const { return m_hash; } + const Operand& operator[](unsigned idx) const + { + assert(idx<m_count); + return m_operands[idx]; + } + + void add(const Operand& op) + { + assert(m_count < COUNTOF(m_operands)); + m_hash = (m_hash<<HASH_BITS_PER_OPERAND) | op.hash(); + m_operands[m_count++] = op; + m_need_rex = m_need_rex || op.m_need_rex; + } +#ifdef _EM64T_ + bool need_rex(void) const { return m_need_rex; } +#else + bool need_rex(void) const { return false; } +#endif + void clear(void) + { + m_count = 0; m_hash = 0; m_need_rex = false; + } + private: + unsigned m_count; + Operand m_operands[COUNTOF( ((OpcodeDesc*)NULL)->opnds )]; + unsigned m_hash; + bool m_need_rex; + }; +public: +#ifdef _DEBUG + /** + * Verifies some presumptions about encoding data table. + * Called automaticaly during statics initialization. + */ + static int verify(void); +#endif + +private: + /** + * @brief Returns found OpcodeDesc by the given Mnemonic and operands. + */ + static const OpcodeDesc * lookup(Mnemonic mn, const Operands& opnds); + /** + * @brief Encodes mod/rm byte. + */ + static char* encodeModRM(char* stream, const Operands& opnds, + unsigned idx, const OpcodeDesc * odesc, Rex * prex); + /** + * @brief Encodes special things of opcode description - '/r', 'ib', etc. + */ + static char* encode_aux(char* stream, unsigned aux, + const Operands& opnds, const OpcodeDesc * odesc, + unsigned * pargsCount, Rex* prex); +#ifdef _EM64T_ + /** + * @brief Returns true if the 'reg' argument represents one of the new + * EM64T registers - R8(D)-R15(D). + * + * The 64 bits versions of 'old-fashion' registers, i.e. RAX are not + * considered as 'extra'. + */ + static bool is_em64t_extra_reg(const RegName reg) + { + if (needs_rex_r(reg)) { + return true; + } + if (RegName_SPL <= reg && reg <= RegName_R15L) { + return true; + } + return false; + } + static bool needs_rex_r(const RegName reg) + { + if (RegName_R8 <= reg && reg <= RegName_R15) { + return true; + } + if (RegName_R8D <= reg && reg <= RegName_R15D) { + return true; + } + if (RegName_R8S <= reg && reg <= RegName_R15S) { + return true; + } + if (RegName_R8L <= reg && reg <= RegName_R15L) { + return true; + } + if (RegName_XMM8 <= reg && reg <= RegName_XMM15) { + return true; + } + if (RegName_XMM8D <= reg && reg <= RegName_XMM15D) { + return true; + } + if (RegName_XMM8S <= reg && reg <= RegName_XMM15S) { + return true; + } + return false; + } + /** + * @brief Returns an 'processor's index' of the register - the index + * used to encode the register in ModRM/SIB bytes. + * + * For the new EM64T registers the 'HW index' differs from the index + * encoded in RegName. For old-fashion registers it's effectively the + * same as ::getRegIndex(RegName). + */ + static unsigned char getHWRegIndex(const RegName reg) + { + if (getRegKind(reg) != OpndKind_GPReg) { + return getRegIndex(reg); + } + if (RegName_SPL <= reg && reg<=RegName_DIL) { + return getRegIndex(reg); + } + if (RegName_R8L<= reg && reg<=RegName_R15L) { + return getRegIndex(reg) - getRegIndex(RegName_R8L); + } + return is_em64t_extra_reg(reg) ? + getRegIndex(reg)-getRegIndex(RegName_R8D) : getRegIndex(reg); + } +#else + static unsigned char getHWRegIndex(const RegName reg) + { + return getRegIndex(reg); + } + static bool is_em64t_extra_reg(const RegName reg) + { + return false; + } +#endif +public: + static unsigned char get_size_hash(OpndSize size) { + return (size <= OpndSize_64) ? size_hash[size] : 0xFF; + } + static unsigned char get_kind_hash(OpndKind kind) { + return (kind <= OpndKind_Mem) ? kind_hash[kind] : 0xFF; + } + + /** + * @brief A table used for the fast computation of hash value. + * + * A change must be strictly balanced with hash-related functions and data + * in enc_base.h/.cpp. + */ + static const unsigned char size_hash[OpndSize_64+1]; + /** + * @brief A table used for the fast computation of hash value. + * + * A change must be strictly balanced with hash-related functions and data + * in enc_base.h/.cpp. + */ + static const unsigned char kind_hash[OpndKind_Mem+1]; + /** + * @brief Maximum number of opcodes used for a single mnemonic. + * + * No arithmetics behind the number, simply estimated. + */ + static const unsigned int MAX_OPCODES = 32; //20; + /** + * @brief Mapping between operands hash code and operands. + */ + static unsigned char opcodesHashMap[Mnemonic_Count][HASH_MAX]; + /** + * @brief Array of mnemonics. + */ + static MnemonicDesc mnemonics[Mnemonic_Count]; + /** + * @brief Array of available opcodes. + */ + static OpcodeDesc opcodes[Mnemonic_Count][MAX_OPCODES]; + + static int buildTable(void); + static void buildMnemonicDesc(const MnemonicInfo * minfo); + /** + * @brief Computes hash value for the given operands. + */ + static unsigned short getHash(const OpcodeInfo* odesc); + /** + * @brief Dummy variable, for automatic invocation of buildTable() at + * startup. + */ + static int dummy; + + static char * curRelOpnd[3]; +}; + +ENCODER_NAMESPACE_END + +#endif // ifndef __ENC_BASE_H_INCLUDED__ diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_defs.h b/libpixelflinger/codeflinger/x86/libenc/enc_defs.h new file mode 100644 index 0000000..10409d2 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs.h @@ -0,0 +1,786 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ +#ifndef _ENCODER_DEFS_H_ +#define _ENCODER_DEFS_H_ + + +// Used to isolate experimental or being tuned encoder into a separate +// namespace so it can coexist with a stable one in the same bundle. +#ifdef ENCODER_ISOLATE + #define ENCODER_NAMESPACE_START namespace enc_ia32 { + #define ENCODER_NAMESPACE_END }; +#else + #define ENCODER_NAMESPACE_START + #define ENCODER_NAMESPACE_END +#endif + +#include <assert.h> +#include "enc_defs_ext.h" + +#ifndef COUNTOF + /** + * Number of items in an array. + */ + #define COUNTOF(a) (sizeof(a)/sizeof(a[0])) +#endif + +#ifdef _EM64T_ + /** + * A stack pointer of default platform's size. + */ + #define REG_STACK RegName_RSP + /** + * A max GP register (with a highest index number) + */ + #define REG_MAX RegName_R15 + /** + * Total number of GP registers including stack pointer. + */ + #define MAX_REGS 15 +#else + #define REG_STACK RegName_ESP + #define REG_MAX RegName_EDI + #define MAX_REGS 8 +#endif + +ENCODER_NAMESPACE_START + +/** + * A number of bytes 'eaten' by an ordinary PUSH/POP. + */ +#define STACK_SLOT_SIZE (sizeof(void*)) + + +/** + * A recommended by Intel Arch Manual aligment for instructions that + * are targets for jmps. + */ +#define JMP_TARGET_ALIGMENT (16) +/** + * A maximum possible size of native instruction. + */ +#define MAX_NATIVE_INST_SIZE (15) +/** + * The enum OpndKind describes an operand's location - memory, immediate or a register. + * It can be used as a bit mask. + */ +typedef enum OpndKind { + /** + * A change must be balanced with at least the following places: + * Ia32::Constraint-s use the OpndKind as a mask + * encoder.cpp & encoder_master_info.cpp uses OpndKind as an index for hashing + * - perhaps there are much more places + * + * NOTE: an MMXReg kind is incompatible with the current constraints framework, + * as it's not encoded as a mask. + */ + OpndKind_Null=0, + OpndKind_GPReg = 0x01, OpndKind_MinRegKind = OpndKind_GPReg, + OpndKind_SReg = 0x02, +#ifdef _HAVE_MMX_ + OpndKind_MMXReg = 0x03, +#endif + OpndKind_FPReg = 0x04, + OpndKind_XMMReg = 0x08, + OpndKind_OtherReg = 0x10, + OpndKind_StatusReg = OpndKind_OtherReg, + OpndKind_MaxRegKind = OpndKind_StatusReg, // a max existing kind of register + OpndKind_MaxReg, // -'- + 1 to be used in array defs + // + OpndKind_Immediate = 0x20, OpndKind_Imm=OpndKind_Immediate, + OpndKind_Memory = 0x40, OpndKind_Mem=OpndKind_Memory, + // + OpndKind_Reg = 0x1F, + OpndKind_Any = 0x7F, + // syntetic constants. Normally not used anywhere, but are used for + // human-readable showing under the debugger + OpndKind_GPReg_Mem = OpndKind_GPReg|OpndKind_Mem, +#ifdef _HAVE_MMX_ + OpndKind_MMXReg_Mem = OpndKind_MMXReg|OpndKind_Mem, +#endif + OpndKind_XMMReg_Mem = OpndKind_XMMReg|OpndKind_Mem, +} OpndKind; + +/** + * Defines type of extention allowed for particular operand. + * For example imul r32,r_m32,imm8 sign extend imm8 before performing multiplication. + * To satisfy instruction constraints immediate operand should be either OpndExt_Signed + * or OpndExt_Any. + */ +typedef enum OpndExt { + OpndExt_None = 0x0, + OpndExt_Signed = 0x1, + OpndExt_Zero = 0x2, + OpndExt_Any = 0x3, +}OpndExt; + +/** + * enum OpndRole defines the role of an operand in an instruction + * Can be used as mask to combine def and use. The complete def+use + * info can be combined in 2 bits which is used, say in Encoder::OpndRole. + */ +//TODO: this duplicates an Role used in the Ia32::Inst. That duplicate enum should be removed. +typedef enum OpndRole { + OpndRole_Null=0, + OpndRole_Use=0x1, + OpndRole_Def=0x2, + OpndRole_UseDef=OpndRole_Use|OpndRole_Def, + OpndRole_All=0xffff, +} OpndRole; + + +#define REGNAME(k,s,i) ( ((k & OpndKind_Any)<<24) | ((s & OpndSize_Any)<<16) | (i&0xFF) ) + +// Gregory - +// It is critical that all register indexes (3rd number) inside of the +// following table go in ascending order. That is R8 goes after +// RDI. It is necessary for decoder when extending registers from RAX-RDI +// to R8-R15 by simply adding 8 to the index on EM64T architecture +typedef enum RegName { + + RegName_Null = 0, + +#ifdef _EM64T_ + /* + An index part of the RegName-s for RAX-RDI, EAX-ESI, AX-SI and AL-BH is + the same as the index used during instructions encoding. The same rule + applies for XMM regsters for IA32. + For new EM64T registers (both GP and XMM) the index need to be corrected to + obtain the index used in processor's instructions. + */ + RegName_RAX = REGNAME(OpndKind_GPReg,OpndSize_64,0), + RegName_RCX = REGNAME(OpndKind_GPReg,OpndSize_64,1), + RegName_RDX = REGNAME(OpndKind_GPReg,OpndSize_64,2), + RegName_RBX = REGNAME(OpndKind_GPReg,OpndSize_64,3), + RegName_RSP = REGNAME(OpndKind_GPReg,OpndSize_64,4), + RegName_RBP = REGNAME(OpndKind_GPReg,OpndSize_64,5), + RegName_RSI = REGNAME(OpndKind_GPReg,OpndSize_64,6), + RegName_RDI = REGNAME(OpndKind_GPReg,OpndSize_64,7), + + RegName_R8 = REGNAME(OpndKind_GPReg,OpndSize_64,8), + RegName_R9 = REGNAME(OpndKind_GPReg,OpndSize_64,9), + RegName_R10 = REGNAME(OpndKind_GPReg,OpndSize_64,10), + RegName_R11 = REGNAME(OpndKind_GPReg,OpndSize_64,11), + RegName_R12 = REGNAME(OpndKind_GPReg,OpndSize_64,12), + RegName_R13 = REGNAME(OpndKind_GPReg,OpndSize_64,13), + RegName_R14 = REGNAME(OpndKind_GPReg,OpndSize_64,14), + RegName_R15 = REGNAME(OpndKind_GPReg,OpndSize_64,15), +#endif //~_EM64T_ + + RegName_EAX=REGNAME(OpndKind_GPReg,OpndSize_32,0), + RegName_ECX=REGNAME(OpndKind_GPReg,OpndSize_32,1), + RegName_EDX=REGNAME(OpndKind_GPReg,OpndSize_32,2), + RegName_EBX=REGNAME(OpndKind_GPReg,OpndSize_32,3), + RegName_ESP=REGNAME(OpndKind_GPReg,OpndSize_32,4), + RegName_EBP=REGNAME(OpndKind_GPReg,OpndSize_32,5), + RegName_ESI=REGNAME(OpndKind_GPReg,OpndSize_32,6), + RegName_EDI=REGNAME(OpndKind_GPReg,OpndSize_32,7), + +#ifdef _EM64T_ + RegName_R8D = REGNAME(OpndKind_GPReg,OpndSize_32,8), + RegName_R9D = REGNAME(OpndKind_GPReg,OpndSize_32,9), + RegName_R10D = REGNAME(OpndKind_GPReg,OpndSize_32,10), + RegName_R11D = REGNAME(OpndKind_GPReg,OpndSize_32,11), + RegName_R12D = REGNAME(OpndKind_GPReg,OpndSize_32,12), + RegName_R13D = REGNAME(OpndKind_GPReg,OpndSize_32,13), + RegName_R14D = REGNAME(OpndKind_GPReg,OpndSize_32,14), + RegName_R15D = REGNAME(OpndKind_GPReg,OpndSize_32,15), +#endif //~_EM64T_ + + RegName_AX=REGNAME(OpndKind_GPReg,OpndSize_16,0), + RegName_CX=REGNAME(OpndKind_GPReg,OpndSize_16,1), + RegName_DX=REGNAME(OpndKind_GPReg,OpndSize_16,2), + RegName_BX=REGNAME(OpndKind_GPReg,OpndSize_16,3), + RegName_SP=REGNAME(OpndKind_GPReg,OpndSize_16,4), + RegName_BP=REGNAME(OpndKind_GPReg,OpndSize_16,5), + RegName_SI=REGNAME(OpndKind_GPReg,OpndSize_16,6), + RegName_DI=REGNAME(OpndKind_GPReg,OpndSize_16,7), + +#ifdef _EM64T_ + RegName_R8S = REGNAME(OpndKind_GPReg,OpndSize_16,8), + RegName_R9S = REGNAME(OpndKind_GPReg,OpndSize_16,9), + RegName_R10S = REGNAME(OpndKind_GPReg,OpndSize_16,10), + RegName_R11S = REGNAME(OpndKind_GPReg,OpndSize_16,11), + RegName_R12S = REGNAME(OpndKind_GPReg,OpndSize_16,12), + RegName_R13S = REGNAME(OpndKind_GPReg,OpndSize_16,13), + RegName_R14S = REGNAME(OpndKind_GPReg,OpndSize_16,14), + RegName_R15S = REGNAME(OpndKind_GPReg,OpndSize_16,15), +#endif //~_EM64T_ + + RegName_AL=REGNAME(OpndKind_GPReg,OpndSize_8,0), + RegName_CL=REGNAME(OpndKind_GPReg,OpndSize_8,1), + RegName_DL=REGNAME(OpndKind_GPReg,OpndSize_8,2), + RegName_BL=REGNAME(OpndKind_GPReg,OpndSize_8,3), + // FIXME: Used in enc_tabl.cpp + // AH is not accessible on EM64T, instead encoded register is SPL, so decoded + // register will return incorrect enum + RegName_AH=REGNAME(OpndKind_GPReg,OpndSize_8,4), +#if !defined(_EM64T_) + RegName_CH=REGNAME(OpndKind_GPReg,OpndSize_8,5), + RegName_DH=REGNAME(OpndKind_GPReg,OpndSize_8,6), + RegName_BH=REGNAME(OpndKind_GPReg,OpndSize_8,7), +#else + RegName_SPL=REGNAME(OpndKind_GPReg,OpndSize_8,4), + RegName_BPL=REGNAME(OpndKind_GPReg,OpndSize_8,5), + RegName_SIL=REGNAME(OpndKind_GPReg,OpndSize_8,6), + RegName_DIL=REGNAME(OpndKind_GPReg,OpndSize_8,7), + RegName_R8L=REGNAME(OpndKind_GPReg,OpndSize_8,8), + RegName_R9L=REGNAME(OpndKind_GPReg,OpndSize_8,9), + RegName_R10L=REGNAME(OpndKind_GPReg,OpndSize_8,10), + RegName_R11L=REGNAME(OpndKind_GPReg,OpndSize_8,11), + RegName_R12L=REGNAME(OpndKind_GPReg,OpndSize_8,12), + RegName_R13L=REGNAME(OpndKind_GPReg,OpndSize_8,13), + RegName_R14L=REGNAME(OpndKind_GPReg,OpndSize_8,14), + RegName_R15L=REGNAME(OpndKind_GPReg,OpndSize_8,15), +#endif + + RegName_ES=REGNAME(OpndKind_SReg,OpndSize_16,0), + RegName_CS=REGNAME(OpndKind_SReg,OpndSize_16,1), + RegName_SS=REGNAME(OpndKind_SReg,OpndSize_16,2), + RegName_DS=REGNAME(OpndKind_SReg,OpndSize_16,3), + RegName_FS=REGNAME(OpndKind_SReg,OpndSize_16,4), + RegName_GS=REGNAME(OpndKind_SReg,OpndSize_16,5), + + RegName_EFLAGS=REGNAME(OpndKind_StatusReg,OpndSize_32,0), + +#if !defined(TESTING_ENCODER) + RegName_FP0=REGNAME(OpndKind_FPReg,OpndSize_80,0), + RegName_FP1=REGNAME(OpndKind_FPReg,OpndSize_80,1), + RegName_FP2=REGNAME(OpndKind_FPReg,OpndSize_80,2), + RegName_FP3=REGNAME(OpndKind_FPReg,OpndSize_80,3), + RegName_FP4=REGNAME(OpndKind_FPReg,OpndSize_80,4), + RegName_FP5=REGNAME(OpndKind_FPReg,OpndSize_80,5), + RegName_FP6=REGNAME(OpndKind_FPReg,OpndSize_80,6), + RegName_FP7=REGNAME(OpndKind_FPReg,OpndSize_80,7), +#endif + RegName_FP0S=REGNAME(OpndKind_FPReg,OpndSize_32,0), + RegName_FP1S=REGNAME(OpndKind_FPReg,OpndSize_32,1), + RegName_FP2S=REGNAME(OpndKind_FPReg,OpndSize_32,2), + RegName_FP3S=REGNAME(OpndKind_FPReg,OpndSize_32,3), + RegName_FP4S=REGNAME(OpndKind_FPReg,OpndSize_32,4), + RegName_FP5S=REGNAME(OpndKind_FPReg,OpndSize_32,5), + RegName_FP6S=REGNAME(OpndKind_FPReg,OpndSize_32,6), + RegName_FP7S=REGNAME(OpndKind_FPReg,OpndSize_32,7), + + RegName_FP0D=REGNAME(OpndKind_FPReg,OpndSize_64,0), + RegName_FP1D=REGNAME(OpndKind_FPReg,OpndSize_64,1), + RegName_FP2D=REGNAME(OpndKind_FPReg,OpndSize_64,2), + RegName_FP3D=REGNAME(OpndKind_FPReg,OpndSize_64,3), + RegName_FP4D=REGNAME(OpndKind_FPReg,OpndSize_64,4), + RegName_FP5D=REGNAME(OpndKind_FPReg,OpndSize_64,5), + RegName_FP6D=REGNAME(OpndKind_FPReg,OpndSize_64,6), + RegName_FP7D=REGNAME(OpndKind_FPReg,OpndSize_64,7), + +#if !defined(TESTING_ENCODER) + RegName_XMM0=REGNAME(OpndKind_XMMReg,OpndSize_128,0), + RegName_XMM1=REGNAME(OpndKind_XMMReg,OpndSize_128,1), + RegName_XMM2=REGNAME(OpndKind_XMMReg,OpndSize_128,2), + RegName_XMM3=REGNAME(OpndKind_XMMReg,OpndSize_128,3), + RegName_XMM4=REGNAME(OpndKind_XMMReg,OpndSize_128,4), + RegName_XMM5=REGNAME(OpndKind_XMMReg,OpndSize_128,5), + RegName_XMM6=REGNAME(OpndKind_XMMReg,OpndSize_128,6), + RegName_XMM7=REGNAME(OpndKind_XMMReg,OpndSize_128,7), + +#ifdef _EM64T_ + RegName_XMM8 = REGNAME(OpndKind_XMMReg,OpndSize_128,0), + RegName_XMM9 = REGNAME(OpndKind_XMMReg,OpndSize_128,1), + RegName_XMM10 = REGNAME(OpndKind_XMMReg,OpndSize_128,2), + RegName_XMM11 = REGNAME(OpndKind_XMMReg,OpndSize_128,3), + RegName_XMM12 = REGNAME(OpndKind_XMMReg,OpndSize_128,4), + RegName_XMM13 = REGNAME(OpndKind_XMMReg,OpndSize_128,5), + RegName_XMM14 = REGNAME(OpndKind_XMMReg,OpndSize_128,6), + RegName_XMM15 = REGNAME(OpndKind_XMMReg,OpndSize_128,7), +#endif //~_EM64T_ + +#endif // ~TESTING_ENCODER + + RegName_XMM0S=REGNAME(OpndKind_XMMReg,OpndSize_32,0), + RegName_XMM1S=REGNAME(OpndKind_XMMReg,OpndSize_32,1), + RegName_XMM2S=REGNAME(OpndKind_XMMReg,OpndSize_32,2), + RegName_XMM3S=REGNAME(OpndKind_XMMReg,OpndSize_32,3), + RegName_XMM4S=REGNAME(OpndKind_XMMReg,OpndSize_32,4), + RegName_XMM5S=REGNAME(OpndKind_XMMReg,OpndSize_32,5), + RegName_XMM6S=REGNAME(OpndKind_XMMReg,OpndSize_32,6), + RegName_XMM7S=REGNAME(OpndKind_XMMReg,OpndSize_32,7), +#ifdef _EM64T_ + RegName_XMM8S=REGNAME(OpndKind_XMMReg,OpndSize_32,8), + RegName_XMM9S=REGNAME(OpndKind_XMMReg,OpndSize_32,9), + RegName_XMM10S=REGNAME(OpndKind_XMMReg,OpndSize_32,10), + RegName_XMM11S=REGNAME(OpndKind_XMMReg,OpndSize_32,11), + RegName_XMM12S=REGNAME(OpndKind_XMMReg,OpndSize_32,12), + RegName_XMM13S=REGNAME(OpndKind_XMMReg,OpndSize_32,13), + RegName_XMM14S=REGNAME(OpndKind_XMMReg,OpndSize_32,14), + RegName_XMM15S=REGNAME(OpndKind_XMMReg,OpndSize_32,15), +#endif // ifdef _EM64T_ + RegName_XMM0D=REGNAME(OpndKind_XMMReg,OpndSize_64,0), + RegName_XMM1D=REGNAME(OpndKind_XMMReg,OpndSize_64,1), + RegName_XMM2D=REGNAME(OpndKind_XMMReg,OpndSize_64,2), + RegName_XMM3D=REGNAME(OpndKind_XMMReg,OpndSize_64,3), + RegName_XMM4D=REGNAME(OpndKind_XMMReg,OpndSize_64,4), + RegName_XMM5D=REGNAME(OpndKind_XMMReg,OpndSize_64,5), + RegName_XMM6D=REGNAME(OpndKind_XMMReg,OpndSize_64,6), + RegName_XMM7D=REGNAME(OpndKind_XMMReg,OpndSize_64,7), +#ifdef _EM64T_ + RegName_XMM8D=REGNAME(OpndKind_XMMReg,OpndSize_64,8), + RegName_XMM9D=REGNAME(OpndKind_XMMReg,OpndSize_64,9), + RegName_XMM10D=REGNAME(OpndKind_XMMReg,OpndSize_64,10), + RegName_XMM11D=REGNAME(OpndKind_XMMReg,OpndSize_64,11), + RegName_XMM12D=REGNAME(OpndKind_XMMReg,OpndSize_64,12), + RegName_XMM13D=REGNAME(OpndKind_XMMReg,OpndSize_64,13), + RegName_XMM14D=REGNAME(OpndKind_XMMReg,OpndSize_64,14), + RegName_XMM15D=REGNAME(OpndKind_XMMReg,OpndSize_64,15), +#endif // ifdef _EM64T_ +#ifdef _HAVE_MMX_ + RegName_MMX0=REGNAME(OpndKind_MMXReg,OpndSize_64,0), + RegName_MMX1=REGNAME(OpndKind_MMXReg,OpndSize_64,1), + RegName_MMX2=REGNAME(OpndKind_MMXReg,OpndSize_64,2), + RegName_MMX3=REGNAME(OpndKind_MMXReg,OpndSize_64,3), + RegName_MMX4=REGNAME(OpndKind_MMXReg,OpndSize_64,4), + RegName_MMX5=REGNAME(OpndKind_MMXReg,OpndSize_64,5), + RegName_MMX6=REGNAME(OpndKind_MMXReg,OpndSize_64,6), + RegName_MMX7=REGNAME(OpndKind_MMXReg,OpndSize_64,7), +#endif // _HAVE_MMX_ +} RegName; + +#if 0 // Android x86: use mnemonics defined in enc_defs_ext.h +/** + * Conditional mnemonics. + * The values match the 'real' (==processor's) values of the appropriate + * condition values used in the opcodes. + */ +enum ConditionMnemonic { + + ConditionMnemonic_O=0, + ConditionMnemonic_NO=1, + ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B, + ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB, + ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z, + ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ, + ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE, + ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE, + + ConditionMnemonic_S=8, + ConditionMnemonic_NS=9, + ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P, + ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP, + ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L, + ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL, + ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE, + ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE, + ConditionMnemonic_Count=16 +}; + + +#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond + +//========================================================================================================= +enum Mnemonic { + +Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL, +Mnemonic_ADC, // Add with Carry +Mnemonic_ADD, // Add +Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values +Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values +Mnemonic_AND, // Logical AND + +Mnemonic_BSF, // Bit scan forward +Mnemonic_BSR, // Bit scan reverse + +Mnemonic_CALL, // Call Procedure +Mnemonic_CMC, // Complement Carry Flag +Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword +Mnemonic_CMOVcc, // Conditional Move + CCM(CMOV,O), + CCM(CMOV,NO), + CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C), + CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC), + CCM(CMOV,Z), CCM(CMOV,E), + CCM(CMOV,NZ), CCM(CMOV,NE), + CCM(CMOV,BE), CCM(CMOV,NA), + CCM(CMOV,NBE), CCM(CMOV,A), + + CCM(CMOV,S), + CCM(CMOV,NS), + CCM(CMOV,P), CCM(CMOV,PE), + CCM(CMOV,NP), CCM(CMOV,PO), + CCM(CMOV,L), CCM(CMOV,NGE), + CCM(CMOV,NL), CCM(CMOV,GE), + CCM(CMOV,LE), CCM(CMOV,NG), + CCM(CMOV,NLE), CCM(CMOV,G), + +Mnemonic_CMP, // Compare Two Operands +Mnemonic_CMPXCHG, // Compare and exchange +Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes +Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI +Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI +Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI +// +// double -> float +Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value +// double -> I_32 +Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer +// double [truncated] -> I_32 +Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer +// +// float -> double +Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value +// float -> I_32 +Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer +// float [truncated] -> I_32 +Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer +// +// I_32 -> double +Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value +// I_32 -> float +Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value + +Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS +Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS +Mnemonic_DEC, // Decrement by 1 +//Mnemonic_DIV, // Unsigned Divide +Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values +Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values + +#ifdef _HAVE_MMX_ +Mnemonic_EMMS, // Empty MMX Technology State +#endif + +Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters +Mnemonic_FLDCW, // Load FPU control word +Mnemonic_FADDP, +Mnemonic_FLDZ, +Mnemonic_FADD, +Mnemonic_FSUBP, +Mnemonic_FSUB, +Mnemonic_FISUB, +Mnemonic_FMUL, +Mnemonic_FMULP, +Mnemonic_FDIVP, +Mnemonic_FDIV, +Mnemonic_FUCOMPP, +Mnemonic_FRNDINT, +Mnemonic_FNSTCW, // Store FPU control word +Mnemonic_FSTSW, // Store FPU status word +Mnemonic_FNSTSW, // Store FPU status word +//Mnemonic_FDECSTP, // Decrement Stack-Top Pointer +Mnemonic_FILD, // Load Integer +Mnemonic_FLD, // Load Floating Point Value +Mnemonic_FLDLG2, +Mnemonic_FLDLN2, +Mnemonic_FLD1, + +Mnemonic_FCLEX, // Clear Exceptions +Mnemonic_FCHS, // Change sign of ST0 +Mnemonic_FNCLEX, // Clear Exceptions + +//Mnemonic_FINCSTP, // Increment Stack-Top Pointer +Mnemonic_FIST, // Store Integer +Mnemonic_FISTP, // Store Integer, pop FPU stack +Mnemonic_FISTTP, // Store Integer with Truncation +Mnemonic_FPREM, // Partial Remainder +Mnemonic_FPREM1, // Partial Remainder +Mnemonic_FST, // Store Floating Point Value +Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack +Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack +Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack +Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack +Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack +Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack +Mnemonic_FYL2X, +Mnemonic_FYL2XP1, +Mnemonic_F2XM1, +Mnemonic_FPATAN, +Mnemonic_FXCH, +Mnemonic_FSCALE, + +Mnemonic_XCHG, +Mnemonic_DIV, // Unsigned Divide +Mnemonic_IDIV, // Signed Divide +Mnemonic_MUL, // Unsigned Multiply +Mnemonic_IMUL, // Signed Multiply +Mnemonic_INC, // Increment by 1 +Mnemonic_INT3, // Call break point +Mnemonic_Jcc, // Jump if Condition Is Met + CCM(J,O), + CCM(J,NO), + CCM(J,B), CCM(J,NAE), CCM(J,C), + CCM(J,NB), CCM(J,AE), CCM(J,NC), + CCM(J,Z), CCM(J,E), + CCM(J,NZ), CCM(J,NE), + CCM(J,BE), CCM(J,NA), + CCM(J,NBE), CCM(J,A), + CCM(J,S), + CCM(J,NS), + CCM(J,P), CCM(J,PE), + CCM(J,NP), CCM(J,PO), + CCM(J,L), CCM(J,NGE), + CCM(J,NL), CCM(J,GE), + CCM(J,LE), CCM(J,NG), + CCM(J,NLE), CCM(J,G), +Mnemonic_JMP, // Jump +Mnemonic_LEA, // Load Effective Address +Mnemonic_LEAVE, // High Level Procedure Exit +Mnemonic_LOOP, // Loop according to ECX counter +Mnemonic_LOOPE, // Loop according to ECX counter +Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX +Mnemonic_LAHF, // Load Flags into AH +Mnemonic_MOV, // Move +Mnemonic_MOVD, // Move Double word +Mnemonic_MOVQ, // Move Quadword +/*Mnemonic_MOVS, // Move Data from String to String*/ +// MOVS is a special case: see encoding table for more details, +Mnemonic_MOVS8, Mnemonic_MOVS16, Mnemonic_MOVS32, Mnemonic_MOVS64, +// +Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value +Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value +Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values +Mnemonic_MOVSX, // Move with Sign-Extension +Mnemonic_MOVZX, // Move with Zero-Extend +//Mnemonic_MUL, // Unsigned Multiply +Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values +Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values +Mnemonic_NEG, // Two's Complement Negation +Mnemonic_NOP, // No Operation +Mnemonic_NOT, // One's Complement Negation +Mnemonic_OR, // Logical Inclusive OR +Mnemonic_PREFETCH, // prefetch + +#ifdef _HAVE_MMX_ + Mnemonic_PADDQ, // Add Packed Quadword Integers + Mnemonic_PAND, // Logical AND + Mnemonic_POR, // Bitwise Logical OR + Mnemonic_PSUBQ, // Subtract Packed Quadword Integers +#endif + +Mnemonic_PXOR, // Logical Exclusive OR +Mnemonic_POP, // Pop a Value from the Stack +Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack +Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack +Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack +Mnemonic_RET, // Return from Procedure + +Mnemonic_SETcc, // Set Byte on Condition + CCM(SET,O), + CCM(SET,NO), + CCM(SET,B), CCM(SET,NAE), CCM(SET,C), + CCM(SET,NB), CCM(SET,AE), CCM(SET,NC), + CCM(SET,Z), CCM(SET,E), + CCM(SET,NZ), CCM(SET,NE), + CCM(SET,BE), CCM(SET,NA), + CCM(SET,NBE), CCM(SET,A), + CCM(SET,S), + CCM(SET,NS), + CCM(SET,P), CCM(SET,PE), + CCM(SET,NP), CCM(SET,PO), + CCM(SET,L), CCM(SET,NGE), + CCM(SET,NL), CCM(SET,GE), + CCM(SET,LE), CCM(SET,NG), + CCM(SET,NLE), CCM(SET,G), + +Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left +Mnemonic_SAR, // Shift right +Mnemonic_ROR, // Rotate right +Mnemonic_RCR, // Rotate right through CARRY flag +Mnemonic_ROL, // Rotate left +Mnemonic_RCL, // Rotate left through CARRY flag +Mnemonic_SHR, // Unsigned shift right +Mnemonic_SHRD, // Double Precision Shift Right +Mnemonic_SHLD, // Double Precision Shift Left + +Mnemonic_SBB, // Integer Subtraction with Borrow +Mnemonic_SUB, // Subtract +Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values +Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values + +Mnemonic_TEST, // Logical Compare + +Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS +Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS + +Mnemonic_XOR, // Logical Exclusive OR +// +// packed things, +// +Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values +Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values + +Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values +Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers + +Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values +Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers +// +// String operations +// +Mnemonic_STD, // Set direction flag +Mnemonic_CLD, // Clear direction flag +Mnemonic_SCAS, // Scan string +Mnemonic_STOS, // Store string + +// +Mnemonic_WAIT, // Check pending pending unmasked floating-point exception +// +Mnemonic_Count +}; + +#undef CCM +#endif + +/** + * @brief Instruction prefixes, according to arch manual. + */ +typedef enum InstPrefix { + InstPrefix_Null = 0, + // Group 1 + InstPrefix_LOCK = 0xF0, + InstPrefix_REPNE = 0xF2, + InstPrefix_REPNZ = InstPrefix_REPNE, + InstPrefix_REP = 0xF3, InstPrefix_REPZ = InstPrefix_REP, + // Group 2 + InstPrefix_CS = 0x2E, + InstPrefix_SS = 0x36, + InstPrefix_DS = 0x3E, + InstPrefix_ES = 0x26, + InstPrefix_FS = 0x64, + InstPrefix_GS = 0x65, + // + InstPrefix_HintTaken = 0x3E, + InstPrefix_HintNotTaken = 0x2E, + // Group 3 + InstPrefix_OpndSize = 0x66, + // Group 4 + InstPrefix_AddrSize = 0x67 +} InstPrefix; + +inline unsigned getSizeBytes(OpndSize sz) +{ + if (sz==OpndSize_64) { return 8; } + if (sz==OpndSize_32) { return 4; } + if (sz==OpndSize_16) { return 2; } + if (sz==OpndSize_8) { return 1; } + assert(false); + return 0; +} + +inline bool isRegKind(OpndKind kind) +{ + return OpndKind_GPReg<= kind && kind<=OpndKind_MaxRegKind; +} + +/** + * @brief Returns RegName for a given name. + * + * Name is case-insensitive. + * @param regname - string name of a register + * @return RegName for the given name, or RegName_Null if name is invalid + */ +RegName getRegName(const char * regname); +/** + * Constructs RegName from the given OpndKind, size and index. + */ +inline RegName getRegName(OpndKind k, OpndSize s, int idx) +{ + return (RegName)REGNAME(k,s,idx); +} +/** + * Extracts a bit mask with a bit set at the position of the register's index. + */ +inline unsigned getRegMask(RegName reg) +{ + return 1<<(reg&0xff); +} +/** + * @brief Extracts OpndKind from the RegName. + */ +inline OpndKind getRegKind(RegName reg) +{ + return (OpndKind)(reg>>24); +} +/** + * @brief Extracts OpndSize from RegName. + */ +inline OpndSize getRegSize(RegName reg) +{ + return (OpndSize)((reg>>16)&0xFF); +} +/** + * Extracts an index from the given RegName. + */ +inline unsigned char getRegIndex(RegName reg) +{ + return (unsigned char)(reg&0xFF); +} +/** + * Returns a string name of the given RegName. The name returned is in upper-case. + * Returns NULL if invalid RegName specified. + */ +const char * getRegNameString(RegName reg); +/** + * Returns string name of a given OpndSize. + * Returns NULL if invalid OpndSize passed. + */ +const char * getOpndSizeString(OpndSize size); +/** + * Returns OpndSize passed by its string representation (case insensitive). + * Returns OpndSize_Null if invalid string specified. + * The 'sizeString' can not be NULL. + */ +OpndSize getOpndSize(const char * sizeString); +/** + * Returns string name of a given OpndKind. + * Returns NULL if the passed kind is invalid. + */ +const char * getOpndKindString(OpndKind kind); +/** + * Returns OpndKind found by its string representation (case insensitive). + * Returns OpndKind_Null if the name is invalid. + * The 'kindString' can not be NULL. + */ +OpndKind getOpndKind(const char * kindString); +/** + * + */ +const char * getConditionString(ConditionMnemonic cm); + +/** + * Constructs an RegName with the same index and kind, but with a different size from + * the given RegName (i.e. getRegAlias(EAX, OpndSize_16) => AX; getRegAlias(BL, OpndSize_32) => EBX). + * The constructed RegName is not checked in any way and thus may be invalid. + * Note, that the aliasing does not work for at least AH,BH,CH,DH, ESI, EDI, ESP and EBP regs. + */ +inline RegName getAliasReg(RegName reg, OpndSize sz) +{ + return (RegName)REGNAME(getRegKind(reg), sz, getRegIndex(reg)); +} + +/** + * brief Tests two RegName-s of the same kind for equality. + * + * @note Does work for 8 bit general purpose registers (AH, AL, BH, BL, etc). + */ +inline bool equals(RegName r0, RegName r1) +{ + return getRegKind(r0) == getRegKind(r1) && + getRegIndex(r0) == getRegIndex(r1); +} + +ENCODER_NAMESPACE_END + +#endif // ifndef _ENCODER_DEFS_H_ diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h b/libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h new file mode 100644 index 0000000..53f6d44 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_defs_ext.h @@ -0,0 +1,365 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ENCODER_DEFS_EXT_H_ +#define _ENCODER_DEFS_EXT_H_ + + +// Used to isolate experimental or being tuned encoder into a separate +// namespace so it can coexist with a stable one in the same bundle. +#ifdef ENCODER_ISOLATE + #define ENCODER_NAMESPACE_START namespace enc_ia32 { + #define ENCODER_NAMESPACE_END }; +#else + #define ENCODER_NAMESPACE_START + #define ENCODER_NAMESPACE_END +#endif + +ENCODER_NAMESPACE_START +typedef enum OpndSize { + /** + * A change must be balanced with at least the following places: + * Ia32IRConstants.h :: getByteSize() uses some presumptions about OpndSize_ values + * Ia32::Constraint-s use the OpndSize as a mask + * encoder.cpp & encoder_master_info.cpp uses OpndSize as an index for hashing + * - perhaps there are much more places + */ + OpndSize_Null = 0, + OpndSize_8 = 0x01, + OpndSize_16 = 0x02, + OpndSize_32 = 0x04, + OpndSize_64 = 0x08, +#if !defined(TESTING_ENCODER) + OpndSize_80 = 0x10, + OpndSize_128 = 0x20, +#endif + OpndSize_Max, + OpndSize_Any = 0x3F, + OpndSize_Default = OpndSize_Any +} OpndSize; + +/** + * Conditional mnemonics. + * The values match the 'real' (==processor's) values of the appropriate + * condition values used in the opcodes. + */ +typedef enum ConditionMnemonic { + + ConditionMnemonic_O=0, + ConditionMnemonic_NO=1, + ConditionMnemonic_B=2, ConditionMnemonic_NAE=ConditionMnemonic_B, ConditionMnemonic_C=ConditionMnemonic_B, + ConditionMnemonic_NB=3, ConditionMnemonic_AE=ConditionMnemonic_NB, ConditionMnemonic_NC=ConditionMnemonic_NB, + ConditionMnemonic_Z=4, ConditionMnemonic_E=ConditionMnemonic_Z, + ConditionMnemonic_NZ=5, ConditionMnemonic_NE=ConditionMnemonic_NZ, + ConditionMnemonic_BE=6, ConditionMnemonic_NA=ConditionMnemonic_BE, + ConditionMnemonic_NBE=7, ConditionMnemonic_A=ConditionMnemonic_NBE, + + ConditionMnemonic_S=8, + ConditionMnemonic_NS=9, + ConditionMnemonic_P=10, ConditionMnemonic_PE=ConditionMnemonic_P, + ConditionMnemonic_NP=11, ConditionMnemonic_PO=ConditionMnemonic_NP, + ConditionMnemonic_L=12, ConditionMnemonic_NGE=ConditionMnemonic_L, + ConditionMnemonic_NL=13, ConditionMnemonic_GE=ConditionMnemonic_NL, + ConditionMnemonic_LE=14, ConditionMnemonic_NG=ConditionMnemonic_LE, + ConditionMnemonic_NLE=15, ConditionMnemonic_G=ConditionMnemonic_NLE, + ConditionMnemonic_Count=16 +} ConditionMnemonic; + + +#define CCM(prefix,cond) Mnemonic_##prefix##cond=Mnemonic_##prefix##cc+ConditionMnemonic_##cond + +//========================================================================================================= +typedef enum Mnemonic { + +Mnemonic_NULL=0, Mnemonic_Null=Mnemonic_NULL, +Mnemonic_JMP, // Jump +Mnemonic_MOV, // Move +Mnemonic_Jcc, // Jump if Condition Is Met + CCM(J,O), + CCM(J,NO), + CCM(J,B), CCM(J,NAE), CCM(J,C), + CCM(J,NB), CCM(J,AE), CCM(J,NC), + CCM(J,Z), CCM(J,E), + CCM(J,NZ), CCM(J,NE), + CCM(J,BE), CCM(J,NA), + CCM(J,NBE), CCM(J,A), + CCM(J,S), + CCM(J,NS), + CCM(J,P), CCM(J,PE), + CCM(J,NP), CCM(J,PO), + CCM(J,L), CCM(J,NGE), + CCM(J,NL), CCM(J,GE), + CCM(J,LE), CCM(J,NG), + CCM(J,NLE), CCM(J,G), +Mnemonic_CALL, // Call Procedure + +Mnemonic_ADC, // Add with Carry +Mnemonic_ADD, // Add +Mnemonic_ADDSD, // Add Scalar Double-Precision Floating-Point Values +Mnemonic_ADDSS, // Add Scalar Single-Precision Floating-Point Values +Mnemonic_AND, // Logical AND + +Mnemonic_BSF, // Bit scan forward +Mnemonic_BSR, // Bit scan reverse + +Mnemonic_CMC, // Complement Carry Flag +Mnemonic_CWD, Mnemonic_CDQ=Mnemonic_CWD,// Convert Word to Doubleword/Convert Doubleword to Qua T dword +Mnemonic_CMOVcc, // Conditional Move + CCM(CMOV,O), + CCM(CMOV,NO), + CCM(CMOV,B), CCM(CMOV,NAE), CCM(CMOV,C), + CCM(CMOV,NB), CCM(CMOV,AE), CCM(CMOV,NC), + CCM(CMOV,Z), CCM(CMOV,E), + CCM(CMOV,NZ), CCM(CMOV,NE), + CCM(CMOV,BE), CCM(CMOV,NA), + CCM(CMOV,NBE), CCM(CMOV,A), + + CCM(CMOV,S), + CCM(CMOV,NS), + CCM(CMOV,P), CCM(CMOV,PE), + CCM(CMOV,NP), CCM(CMOV,PO), + CCM(CMOV,L), CCM(CMOV,NGE), + CCM(CMOV,NL), CCM(CMOV,GE), + CCM(CMOV,LE), CCM(CMOV,NG), + CCM(CMOV,NLE), CCM(CMOV,G), + +Mnemonic_CMP, // Compare Two Operands +Mnemonic_CMPXCHG, // Compare and exchange +Mnemonic_CMPXCHG8B, // Compare and Exchange 8 Bytes +Mnemonic_CMPSB, // Compare Two Bytes at DS:ESI and ES:EDI +Mnemonic_CMPSW, // Compare Two Words at DS:ESI and ES:EDI +Mnemonic_CMPSD, // Compare Two Doublewords at DS:ESI and ES:EDI +// +// double -> float +Mnemonic_CVTSD2SS, // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value +// double -> I_32 +Mnemonic_CVTSD2SI, // Convert Scalar Double-Precision Floating-Point Value to Doubleword Integer +// double [truncated] -> I_32 +Mnemonic_CVTTSD2SI, // Convert with Truncation Scalar Double-Precision Floating-Point Value to Signed Doubleword Integer +// +// float -> double +Mnemonic_CVTSS2SD, // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value +// float -> I_32 +Mnemonic_CVTSS2SI, // Convert Scalar Single-Precision Floating-Point Value to Doubleword Integer +// float [truncated] -> I_32 +Mnemonic_CVTTSS2SI, // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer +// +// I_32 -> double +Mnemonic_CVTSI2SD, // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value +// I_32 -> float +Mnemonic_CVTSI2SS, // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value + +Mnemonic_COMISD, // Compare Scalar Ordered Double-Precision Floating-Point Values and Set EFLAGS +Mnemonic_COMISS, // Compare Scalar Ordered Single-Precision Floating-Point Values and Set EFLAGS +Mnemonic_DEC, // Decrement by 1 +Mnemonic_DIVSD, // Divide Scalar Double-Precision Floating-Point Values +Mnemonic_DIVSS, // Divide Scalar Single-Precision Floating-Point Values +Mnemonic_ENTER, // ENTER-Make Stack Frame for Procedure Parameters +Mnemonic_FLDCW, // Load FPU control word +Mnemonic_FADDP, +Mnemonic_FLDZ, +Mnemonic_FADD, +Mnemonic_FSUBP, +Mnemonic_FSUB, +Mnemonic_FISUB, +Mnemonic_FMUL, +Mnemonic_FMULP, +Mnemonic_FDIVP, +Mnemonic_FDIV, +Mnemonic_FUCOM, +Mnemonic_FUCOMI, +Mnemonic_FUCOMP, +Mnemonic_FUCOMIP, +Mnemonic_FUCOMPP, +Mnemonic_FRNDINT, +Mnemonic_FNSTCW, // Store FPU control word +Mnemonic_FSTSW, // Store FPU status word +Mnemonic_FNSTSW, // Store FPU status word +Mnemonic_FILD, // Load Integer +Mnemonic_FLD, // Load Floating Point Value +Mnemonic_FLDLG2, +Mnemonic_FLDLN2, +Mnemonic_FLD1, + +Mnemonic_FCLEX, // Clear Exceptions +Mnemonic_FCHS, // Change sign of ST0 +Mnemonic_FNCLEX, // Clear Exceptions +Mnemonic_FIST, // Store Integer +Mnemonic_FISTP, // Store Integer, pop FPU stack +Mnemonic_FISTTP, // Store Integer with Truncation +Mnemonic_FPREM, // Partial Remainder +Mnemonic_FPREM1, // Partial Remainder +Mnemonic_FST, // Store Floating Point Value +Mnemonic_FSTP, // Store Floating Point Value and pop the FP stack +Mnemonic_FSQRT, //Computes the square root of the source value in the stack and pop the FP stack +Mnemonic_FABS, //Computes the absolute value of the source value in the stack and pop the FP stack +Mnemonic_FSIN, //Computes the sine of the source value in the stack and pop the FP stack +Mnemonic_FCOS, //Computes the cosine of the source value in the stack and pop the FP stack +Mnemonic_FPTAN, //Computes the tangent of the source value in the stack and pop the FP stack +Mnemonic_FYL2X, +Mnemonic_FYL2XP1, +Mnemonic_F2XM1, +Mnemonic_FPATAN, +Mnemonic_FXCH, +Mnemonic_FSCALE, + +Mnemonic_XCHG, +Mnemonic_DIV, // Unsigned Divide +Mnemonic_IDIV, // Signed Divide +Mnemonic_MUL, // Unsigned Multiply +Mnemonic_IMUL, // Signed Multiply +Mnemonic_INC, // Increment by 1 +Mnemonic_INT3, // Call break point + +Mnemonic_LEA, // Load Effective Address +Mnemonic_LEAVE, // High Level Procedure Exit +Mnemonic_LOOP, // Loop according to ECX counter +Mnemonic_LOOPE, // Loop according to ECX counter +Mnemonic_LOOPNE, Mnemonic_LOOPNZ = Mnemonic_LOOPNE, // Loop according to ECX +Mnemonic_LAHF, // Load Flags into AH +Mnemonic_MOVD, // Move Double word +Mnemonic_MOVQ, // Move Quadword +Mnemonic_MOVS8, +Mnemonic_MOVS16, +Mnemonic_MOVS32, +Mnemonic_MOVS64, +Mnemonic_MOVAPD, // Move Scalar Double-Precision Floating-Point Value +Mnemonic_MOVSD, // Move Scalar Double-Precision Floating-Point Value +Mnemonic_MOVSS, // Move Scalar Single-Precision Floating-Point Values +Mnemonic_MOVSX, // Move with Sign-Extension +Mnemonic_MOVZX, // Move with Zero-Extend +Mnemonic_MULSD, // Multiply Scalar Double-Precision Floating-Point Values +Mnemonic_MULSS, // Multiply Scalar Single-Precision Floating-Point Values +Mnemonic_NEG, // Two's Complement Negation +Mnemonic_NOP, // No Operation +Mnemonic_NOT, // One's Complement Negation +Mnemonic_OR, // Logical Inclusive OR +Mnemonic_PREFETCH, // prefetch +Mnemonic_PADDQ, // Add Packed Quadword Integers +Mnemonic_PAND, // Logical AND +Mnemonic_POR, // Bitwise Logical OR +Mnemonic_PSUBQ, // Subtract Packed Quadword Integers +Mnemonic_PANDN, +Mnemonic_PSLLQ, +Mnemonic_PSRLQ, +Mnemonic_PXOR, // Logical Exclusive OR +Mnemonic_POP, // Pop a Value from the Stack +Mnemonic_POPFD, // Pop a Value of EFLAGS register from the Stack +Mnemonic_PUSH, // Push Word or Doubleword Onto the Stack +Mnemonic_PUSHFD, // Push EFLAGS Doubleword Onto the Stack +Mnemonic_RET, // Return from Procedure + +Mnemonic_SETcc, // Set Byte on Condition + CCM(SET,O), + CCM(SET,NO), + CCM(SET,B), CCM(SET,NAE), CCM(SET,C), + CCM(SET,NB), CCM(SET,AE), CCM(SET,NC), + CCM(SET,Z), CCM(SET,E), + CCM(SET,NZ), CCM(SET,NE), + CCM(SET,BE), CCM(SET,NA), + CCM(SET,NBE), CCM(SET,A), + CCM(SET,S), + CCM(SET,NS), + CCM(SET,P), CCM(SET,PE), + CCM(SET,NP), CCM(SET,PO), + CCM(SET,L), CCM(SET,NGE), + CCM(SET,NL), CCM(SET,GE), + CCM(SET,LE), CCM(SET,NG), + CCM(SET,NLE), CCM(SET,G), + +Mnemonic_SAL, Mnemonic_SHL=Mnemonic_SAL,// Shift left +Mnemonic_SAR, // Unsigned shift right +Mnemonic_ROR, // Rotate right +Mnemonic_RCR, // Rotate right through CARRY flag +Mnemonic_ROL, // Rotate left +Mnemonic_RCL, // Rotate left through CARRY flag +Mnemonic_SHR, // Signed shift right +Mnemonic_SHRD, // Double Precision Shift Right +Mnemonic_SHLD, // Double Precision Shift Left + +Mnemonic_SBB, // Integer Subtraction with Borrow +Mnemonic_SUB, // Subtract +Mnemonic_SUBSD, // Subtract Scalar Double-Precision Floating-Point Values +Mnemonic_SUBSS, // Subtract Scalar Single-Precision Floating-Point Values + +Mnemonic_TEST, // Logical Compare + +Mnemonic_UCOMISD, // Unordered Compare Scalar Double-Precision Floating-Point Values and Set EFLAGS +Mnemonic_UCOMISS, // Unordered Compare Scalar Single-Precision Floating-Point Values and Set EFLAGS + +Mnemonic_XOR, // Logical Exclusive OR +// +// packed things, +// +Mnemonic_XORPD, // Bitwise Logical XOR for Double-Precision Floating-Point Values +Mnemonic_XORPS, // Bitwise Logical XOR for Single-Precision Floating-Point Values + +Mnemonic_CVTDQ2PD, // Convert Packed Doubleword Integers to Packed Double-Precision Floating-Point Values +Mnemonic_CVTTPD2DQ, // Convert with Truncation Packed Double-Precision Floating-Point Values to Packed Doubleword Integers + +Mnemonic_CVTDQ2PS, // Convert Packed Doubleword Integers to Packed Single-Precision Floating-Point Values +Mnemonic_CVTTPS2DQ, // Convert with Truncation Packed Single-Precision Floating-Point Values to Packed Doubleword Integers +// +// String operations +// +Mnemonic_STD, // Set direction flag +Mnemonic_CLD, // Clear direction flag +Mnemonic_SCAS, // Scan string +Mnemonic_STOS, // Store string + +// +Mnemonic_WAIT, // Check pending pending unmasked floating-point exception +Mnemonic_PADDB, //!< Add packed byte integers +Mnemonic_PADDW, //!< Add packed word integers +Mnemonic_PADDD, //!< Add packed doubleword integers +Mnemonic_PSUBB, //!< Subtract packed byte integers +Mnemonic_PSUBW, //!< Subtract packed word integers +Mnemonic_PSUBD, //!< Subtract packed doubleword integers +Mnemonic_PMULLW, //!< Multiply packed word integers +Mnemonic_PMULLD, //!< Multiply packed doubleword integers +Mnemonic_PSLLW, //!< Shift words left and shift in 0s +Mnemonic_PSLLD, //!< Shift doublewords left and shift in 0s +Mnemonic_PSRAW, //!< Shift words right and shift in sign bits +Mnemonic_PSRAD, //!< Shift doublewords right and shift in sign bits +Mnemonic_PSRLW, //!< Shift words right and shift in 0s +Mnemonic_PSRLD, //!< Shift doublewords right and shift in 0s +Mnemonic_PMOVSXBW, //!< Sign extend 8 packed signed 8-bit integers in the low 8 bytes to 8 packed signed 16-bit integers +Mnemonic_PSHUFB, //!< Shuffle bytes +Mnemonic_PSHUFD, //!< Shuffle doublewords +Mnemonic_PSHUFLW, //!< Shuffle packed low words +Mnemonic_PSHUFHW, //!< Shuffle packed high words +Mnemonic_PHADDSW, //!< Add 16-bit signed integers horizontally, then pack saturated integers +Mnemonic_PHADDW, //!< Add 16-bit signed integers horizontally, then pack +Mnemonic_PHADDD, //!< Add 32-bit signed integers horizontally, then pack +Mnemonic_PHSUBSW, //!< Subtract 16-bit signed integers horizontally, then pack saturated integers +Mnemonic_PHSUBW, //!< Subtract 16-bit signed integers horizontally, then pack +Mnemonic_PHSUBD, //!< Subtract 32-bit signed integers horizontally, then pack +Mnemonic_PEXTRB, //!< Extract a byte integer value from xmm +Mnemonic_PEXTRW, //!< Extract a word integer value from xmm +Mnemonic_PEXTRD, //!< Extract a doubleword integer value from xmm +Mnemonic_MOVDQA, //!< Move aligned double quadword +Mnemonic_SHUFPS, //!< Shuffle single words +Mnemonic_MOVAPS, //!< Move aligned single word + +// +Mnemonic_Count +} Mnemonic; + +#undef CCM + +ENCODER_NAMESPACE_END + +#endif // ifndef _ENCODER_DEFS_EXT_H_ diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_prvt.h b/libpixelflinger/codeflinger/x86/libenc/enc_prvt.h new file mode 100644 index 0000000..343b161 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_prvt.h @@ -0,0 +1,382 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ +#ifndef __ENC_PRVT_H_INCLUDED__ +#define __ENC_PRVT_H_INCLUDED__ + +#include "enc_base.h" + +ENCODER_NAMESPACE_START +/* + * @file + * @brief Contains some definitions/constants and other stuff used by the + * Encoder internally. + */ + +enum OpcodeByteKind { + //OpcodeByteKind_Opcode = 0x0000, + OpcodeByteKind_ZeroOpcodeByte = 0x0100, + // + // The names _SlashR, _SlahsNum, _ib, _iw, etc + // represent the appropriate abbreviations used + // in the mnemonic descriptions in the Intel's arch manual. + // + OpcodeByteKind_SlashR = 0x0200, + OpcodeByteKind_SlashNum = 0x0300, + OpcodeByteKind_ib = 0x0400, + OpcodeByteKind_iw = 0x0500, + OpcodeByteKind_id = 0x0600, +#ifdef _EM64T_ + OpcodeByteKind_io = 0x0700, +#endif + OpcodeByteKind_cb = 0x0800, + OpcodeByteKind_cw = 0x0900, + OpcodeByteKind_cd = 0x0A00, + //OpcodeByteKind_cp = 0x0B00, + //OpcodeByteKind_co = 0x0C00, + //OpcodeByteKind_ct = 0x0D00, + + OpcodeByteKind_rb = 0x0E00, + OpcodeByteKind_rw = 0x0F00, + OpcodeByteKind_rd = 0x1000, +#ifdef _EM64T_ + OpcodeByteKind_ro = 0x1100, + //OpcodeByteKind_REX = 0x1200, + OpcodeByteKind_REX_W = 0x1300, +#endif + OpcodeByteKind_plus_i = 0x1400, + /** + * a special marker, means 'no opcode on the given position' + * used in opcodes array, to specify the empty slot, say + * to fill an em64t-specific opcode on ia32. + * last 'e' made lowercase to avoid a mess with 'F' in + * OpcodeByteKind_LAST . + */ + OpcodeByteKind_EMPTY = 0xFFFE, + /** + * a special marker, means 'no more opcodes in the array' + * used in in opcodes array to show that there are no more + * opcodes in the array for a given mnemonic. + */ + OpcodeByteKind_LAST = 0xFFFF, + /** + * a mask to extract the OpcodeByteKind + */ + OpcodeByteKind_KindMask = 0xFF00, + /** + * a mask to extract the opcode byte when presented + */ + OpcodeByteKind_OpcodeMask = 0x00FF +}; + +#ifdef USE_ENCODER_DEFINES + +#define N {0, 0, 0, 0 } +#define U {1, 0, 1, OpndRole_Use } +#define D {1, 1, 0, OpndRole_Def } +#define DU {1, 1, 1, OpndRole_Def|OpndRole_Use } + +#define U_U {2, 0, 2, OpndRole_Use<<2 | OpndRole_Use } +#define D_U {2, 1, 1, OpndRole_Def<<2 | OpndRole_Use } +#define D_DU {2, 2, 1, OpndRole_Def<<2 | (OpndRole_Def|OpndRole_Use) } +#define DU_U {2, 1, 2, ((OpndRole_Def|OpndRole_Use)<<2 | OpndRole_Use) } +#define DU_DU {2, 2, 2, ((OpndRole_Def|OpndRole_Use)<<2 | (OpndRole_Def|OpndRole_Use)) } + +#define DU_DU_DU {3, 3, 3, ((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | (OpndRole_Def|OpndRole_Use) } +#define DU_DU_U {3, 2, 3, (((OpndRole_Def|OpndRole_Use)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) } +#define D_DU_U {3, 2, 2, (((OpndRole_Def)<<4) | ((OpndRole_Def|OpndRole_Use)<<2) | OpndRole_Use) } +#define D_U_U {3, 1, 2, (((OpndRole_Def)<<4) | ((OpndRole_Use)<<2) | OpndRole_Use) } + +// Special encoding of 0x00 opcode byte. Note: it's all O-s, not zeros. +#define OxOO OpcodeByteKind_ZeroOpcodeByte + +#define Size16 InstPrefix_OpndSize + +#define _r OpcodeByteKind_SlashR + +#define _0 OpcodeByteKind_SlashNum|0 +#define _1 OpcodeByteKind_SlashNum|1 +#define _2 OpcodeByteKind_SlashNum|2 +#define _3 OpcodeByteKind_SlashNum|3 +#define _4 OpcodeByteKind_SlashNum|4 +#define _5 OpcodeByteKind_SlashNum|5 +#define _6 OpcodeByteKind_SlashNum|6 +#define _7 OpcodeByteKind_SlashNum|7 + +// '+i' for floating-point instructions +#define _i OpcodeByteKind_plus_i + + +#define ib OpcodeByteKind_ib +#define iw OpcodeByteKind_iw +#define id OpcodeByteKind_id + +#define cb OpcodeByteKind_cb +#define cw OpcodeByteKind_cw +#define cd OpcodeByteKind_cd + +#define rb OpcodeByteKind_rb +#define rw OpcodeByteKind_rw +#define rd OpcodeByteKind_rd + +#define AL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AL} +#define AH {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_AH} +#define AX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_AX} +#define EAX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EAX} +#ifdef _EM64T_ + #define RAX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RAX } +#endif + +#define CL {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_CL} +#define ECX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ECX} +#ifdef _EM64T_ + #define RCX {OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RCX} +#endif + +#define DX {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_DX} +#define EDX {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDX} +#ifdef _EM64T_ + #define RDX { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDX } +#endif + +#define ESI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_ESI} +#ifdef _EM64T_ + #define RSI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RSI } +#endif + +#define EDI {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_EDI} +#ifdef _EM64T_ + #define RDI { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_RDI } +#endif + +#define r8 {OpndKind_GPReg, OpndSize_8, OpndExt_Any, RegName_Null} +#define r16 {OpndKind_GPReg, OpndSize_16, OpndExt_Any, RegName_Null} +#define r32 {OpndKind_GPReg, OpndSize_32, OpndExt_Any, RegName_Null} +#ifdef _EM64T_ + #define r64 { OpndKind_GPReg, OpndSize_64, OpndExt_Any, RegName_Null } +#endif + +#define r_m8 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Any, RegName_Null} +#define r_m16 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Any, RegName_Null} +#define r_m32 {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null} + +#define r_m8s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Signed, RegName_Null} +#define r_m16s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Signed, RegName_Null} +#define r_m32s {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Signed, RegName_Null} + +#define r_m8u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_8, OpndExt_Zero, RegName_Null} +#define r_m16u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_16, OpndExt_Zero, RegName_Null} +#define r_m32u {(OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_32, OpndExt_Zero, RegName_Null} + +//'m' was only used in LEA mnemonic, but is replaced with +// set of exact sizes. See more comments for LEA instruction in TheTable. +//#define m {OpndKind_Mem, OpndSize_Null, RegName_Null} +#define m8 {OpndKind_Mem, OpndSize_8, OpndExt_Any, RegName_Null} +#define m16 {OpndKind_Mem, OpndSize_16, OpndExt_Any, RegName_Null} +#define m32 {OpndKind_Mem, OpndSize_32, OpndExt_Any, RegName_Null} +#define m64 {OpndKind_Mem, OpndSize_64, OpndExt_Any, RegName_Null} +#ifdef _EM64T_ + #define r_m64 { (OpndKind)(OpndKind_GPReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null } +#endif + +#define imm8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null} +#define imm16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null} +#define imm32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} + +#define imm8s {OpndKind_Imm, OpndSize_8, OpndExt_Signed, RegName_Null} +#define imm16s {OpndKind_Imm, OpndSize_16, OpndExt_Signed, RegName_Null} +#define imm32s {OpndKind_Imm, OpndSize_32, OpndExt_Signed, RegName_Null} + +#define imm8u {OpndKind_Imm, OpndSize_8, OpndExt_Zero, RegName_Null} +#define imm16u {OpndKind_Imm, OpndSize_16, OpndExt_Zero, RegName_Null} +#define imm32u {OpndKind_Imm, OpndSize_32, OpndExt_Zero, RegName_Null} + +#ifdef _EM64T_ + #define imm64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null } +#endif + +//FIXME: moff-s are in fact memory refs, but presented as immediate. +// Need to specify this in OpndDesc. +#define moff8 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} +#define moff16 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} +#define moff32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} +#ifdef _EM64T_ + #define moff64 {OpndKind_Imm, OpndSize_64, OpndExt_Any, RegName_Null} +#endif + + +#define rel8 {OpndKind_Imm, OpndSize_8, OpndExt_Any, RegName_Null} +#define rel16 {OpndKind_Imm, OpndSize_16, OpndExt_Any, RegName_Null} +#define rel32 {OpndKind_Imm, OpndSize_32, OpndExt_Any, RegName_Null} + +#define mm64 {OpndKind_MMXReg, OpndSize_64, OpndExt_Any, RegName_Null} +#define mm_m64 {(OpndKind)(OpndKind_MMXReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null} + +#define xmm64 {OpndKind_XMMReg, OpndSize_64, OpndExt_Any, RegName_Null} +#define xmm_m64 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_64, OpndExt_Any, RegName_Null} + +#define xmm32 {OpndKind_XMMReg, OpndSize_32, OpndExt_Any, RegName_Null} +#define xmm_m32 {(OpndKind)(OpndKind_XMMReg|OpndKind_Mem), OpndSize_32, OpndExt_Any, RegName_Null} + +#define FP0S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP0S} +#define FP0D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP0D} +#define FP1S {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_FP1S} +#define FP1D {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_FP1D} +#define fp32 {OpndKind_FPReg, OpndSize_32, OpndExt_Any, RegName_Null} +#define fp64 {OpndKind_FPReg, OpndSize_64, OpndExt_Any, RegName_Null} + +#ifdef _EM64T_ + #define io OpcodeByteKind_io + #define REX_W OpcodeByteKind_REX_W + +#endif + +#endif // USE_ENCODER_DEFINES + +/** + * @brief Represents the REX part of instruction. + */ +struct Rex { + unsigned char b : 1; + unsigned char x : 1; + unsigned char r : 1; + unsigned char w : 1; + unsigned char dummy : 4; // must be '0100'b + unsigned int :24; +}; + +/** + * @brief Describes SIB (scale,index,base) byte. + */ +struct SIB { + unsigned char base:3; + unsigned char index:3; + unsigned char scale:2; + unsigned int padding:24; +}; +/** + * @brief Describes ModRM byte. + */ +struct ModRM +{ + unsigned char rm:3; + unsigned char reg:3; + unsigned char mod:2; + unsigned int padding:24; +}; + + + +/** +* exactly the same as EncoderBase::OpcodeDesc, but also holds info about +* platform on which the opcode is applicable. +*/ +struct OpcodeInfo { + enum platform { + /// an opcode is valid on all platforms + all, + // opcode is valid on IA-32 only + em64t, + // opcode is valid on Intel64 only + ia32, + // opcode is added for the sake of disassembling, should not be used in encoding + decoder, + // only appears in master table, replaced with 'decoder' in hashed version + decoder32, + // only appears in master table, replaced with 'decoder' in hashed version + decoder64, + }; + platform platf; + unsigned opcode[4+1+1]; + EncoderBase::OpndDesc opnds[EncoderBase::MAX_NUM_OPCODE_OPERANDS]; + EncoderBase::OpndRolesDesc roles; +}; + +/** + * @defgroup MF_ Mnemonic flags +*/ + + /** + * Operation has no special properties. + */ +#define MF_NONE (0x00000000) + /** + * Operation affects flags + */ +#define MF_AFFECTS_FLAGS (0x00000001) + /** + * Operation uses flags - conditional operations, ADC/SBB/ETC + */ +#define MF_USES_FLAGS (0x00000002) + /** + * Operation is conditional - MOVcc/SETcc/Jcc/ETC + */ +#define MF_CONDITIONAL (0x00000004) +/** + * Operation is symmetric - its args can be swapped (ADD/MUL/etc). + */ +#define MF_SYMMETRIC (0x00000008) +/** + * Operation is XOR-like - XOR, SUB - operations of 'arg,arg' is pure def, + * without use. + */ +#define MF_SAME_ARG_NO_USE (0x00000010) + +///@} // ~MNF + +/** + * @see same structure as EncoderBase::MnemonicDesc, but carries + * MnemonicInfo::OpcodeInfo[] instead of OpcodeDesc[]. + * Only used during prebuilding the encoding tables, thus it's hidden under + * the appropriate define. + */ +struct MnemonicInfo { + /** + * The mnemonic itself + */ + Mnemonic mn; + /** + * Various characteristics of mnemonic. + * @see MF_ + */ + unsigned flags; + /** + * Number of args/des/uses/roles for the operation. For the operations + * which may use different number of operands (i.e. IMUL/SHL) use the + * most common value, or leave '0' if you are sure this info is not + * required. + */ + EncoderBase::OpndRolesDesc roles; + /** + * Print name of the mnemonic + */ + const char * name; + /** + * Array of opcodes. + * The terminating opcode description always have OpcodeByteKind_LAST + * at the opcodes[i].opcode[0]. + * The size of '25' has nothing behind it, just counted the max + * number of opcodes currently used (MOV instruction). + */ + OpcodeInfo opcodes[25]; +}; + +ENCODER_NAMESPACE_END + +#endif // ~__ENC_PRVT_H_INCLUDED__ diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp b/libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp new file mode 100644 index 0000000..b60d6b7 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_tabl.cpp @@ -0,0 +1,2164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ + + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> //qsort +#include <string.h> +#include <memory.h> +#include <errno.h> +#include <stdlib.h> + + +// need to use EM64T-specifics - new registers, defines from enc_prvt, etc... +#if !defined(_EM64T_) + #define UNDEF_EM64T + #define _EM64T_ +#endif + +#define USE_ENCODER_DEFINES +#include "enc_prvt.h" +#include "enc_defs.h" + +#ifdef UNDEF_EM64T + #undef _EM64T_ +#endif + +//Android x86 +#if 0 //!defined(_HAVE_MMX_) + #define Mnemonic_PADDQ Mnemonic_Null + #define Mnemonic_PAND Mnemonic_Null + #define Mnemonic_POR Mnemonic_Null + #define Mnemonic_PSUBQ Mnemonic_Null +#endif + +ENCODER_NAMESPACE_START + + +EncoderBase::MnemonicDesc EncoderBase::mnemonics[Mnemonic_Count]; +EncoderBase::OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES]; +unsigned char EncoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX]; + + +/** + * @file + * @brief 'Master' copy of encoding data. + */ + +/* +This file contains a 'master copy' of encoding table - this is the info used +by both generator of native instructions (EncoderBase class) and by +disassembling routines. The first one uses an info how to encode the +instruction, and the second does an opposite - several separate tables are +built at runtime from this main table. + +============================================================================= + +The table was designed for easy support and maintenance. Thus, it was made as +much close as possible to the Intel's IA32 Architecture Manual descriptions. +The info is based on the latest (at the moment of writing) revision which is +June 2005, order number 253666-016. + +Normally, almost all of opcodes in the 'master' table represented exactly as +they are shown in the Intel's Architecture manual (well, with slashes +replaced with underscore). There are several exclusions especially marked. + +Normally, to add an opcode/instruction, one only need to copy the whole +string from the manual, and simply replace '/' with '_'. + +I.e., TheManual reads for DEC: + (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1. + (2) REX + FE /1 DEC r/m8* Valid N.E. Decrement r/m8 by 1. + (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1. + +1. Note, that there is no need to explicitly specify REX-based opcodes for + instruction to handle additional registers on EM64T: + + (1) FE /1 DEC r/m8 Valid Valid Decrement r/m8 by 1. + (3) REX.W + FF /1 DEC r/m64 Valid N.E. Decrement r/m64 by 1. + +2. Copy the string, strip off the text comments, replace '/'=>'_'. Note, that + the second line is for EM64T only + + (1) FE /1 DEC r/m8 + (3) REX.W + FF /1 DEC r/m64 + +3. Fill out the mnemonic, opcode parameters parts + + BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU }, + {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU }, + + DU here - one argument, it's used and defined + +4. That's it, that simple ! + +The operand roles (DU here) are used by Jitrino's optimizing engine to +perform data flow analysis. It also used to store/obtain number of operands. + +Special cases are (see the table for details): +LEA +Some FPU operations (i.e. FSTP) +packed things (XORPD, XORPS, CVTDQ2PD, CVTTPD2DQ) + +Also, the Jitrino's needs require to specify all operands - including +implicit ones (see IMUL). + +The master table iself does not need to be ordered - it's get sorted before +processing. It's recommended (though it's not a law) to group similar +instructions together - i.e. FPU instructions, MMX, etc. + +============================================================================= + +The encoding engine builds several tables basing on the 'master' one (here +'mnemonic' is a kind of synonim for 'instruction'): + +- list of mnemonics which holds general info about instructions + (EncoderBase::mnemonics) +- an array of opcodes descriptions (EncodeBase::opcodes) +- a mapping between a hash value and an opcode description record for a given + mnemonic (EncoderBase::opcodesHashMap) + +The EncoderBase::mnemonics holds general info about instructions. +The EncoderBase::opcodesHashMap is used for fast opcode selection basing on +a hash value. +The EncodeBase::opcodes is used for the encoding itself. + +============================================================================= +The hash value is calculated and used as follows: + +JIT-ted code uses the following operand sizes: 8-, 16-, 32- and 64-bits and +size for an operand can be encoded in just 2 bits. + +The following operand locations are available: one of registers - GP, FP, +MMX, XMM (not taking segment registers), a memory and an immediate, which +gives us 6 variants and can be enumerated in 3 bits. + +As a grand total, the the whole operand's info needed for opcode selection +can be packed in 5 bits. Taking into account the IMUL mnemonic with its 3 +operands (including implicit ones), we're getting 15 bits per instruction and +the complete table is about 32768 items per single instruction. + +Seems too many, but luckily, the 15 bit limit will never be reached: the +worst case is IMUL with its 3 operands: +(IMUL r64, r/m64, imm32)/(IMUL r32, r/m32, imm32). +So, assigning lowest value to GP register, the max value of hash can be +reduced. + +The hash values to use are: +sizes: + 8 -> 11 + 16 -> 10 + 32 -> 01 + 64 -> 00 +locations: + gp reg -> 000 + memory -> 001 + fp reg -> 010 + mmx reg -> 011 + xmm reg -> 100 + immediate -> 101 +and the grand total for the worst case would be +[ GP 32] [GP 32] [Imm 32] +[000-01] [000-01] [101 01] = 1077 + +However, the implicit operands adds additional value, and the worstest case +is 'SHLD r_m32, r32, CL=r8'. This gives us the maximum number of: + +[mem 32] [GP 32] [GP 8b] +[001-01] [000-01] [000-11] = 5155. + +The max number is pretty big and the hash functions is quite rare, thus it +is not resonable to use a direct addressing i.e. +OpcodeDesc[mnemonic][hash_code] - there would be a huge waste of space. + +Instead, we use a kind of mapping: the opcodes info is stored in packed +(here: non rare) array. The max number of opcodes will not exceed 255 for +each instruction. And we have an index array in which we store a mapping +between a hash code value and opcode position for each given instruction. + +Sounds a bit sophisticated, but in real is simple, the opcode gets selected +in 2 simple steps: + +1. Select [hash,mnemonic] => 'n'. + +The array is pretty rare - many cells contain 0xFF which +means 'invalid hash - no opcode with given characteristics' + +char EnbcoderBase::opcodesHashMap[Mnemonic_Count][HASH_MAX] = + ++----+----+----+----+----+----+ +| 00 | 05 | FF | FF | 03 | 12 | ... +|---------+-------------------+ +| 12 | FF | FF | n | 04 | 25 | ... <- Mnemonic +|-----------------------------+ +| FF | 11 | FF | 10 | 13 | .. | ... ++-----------------------------+ + ... ^ + | + hash + +2. Select [n,mnemonic] => 'opcode_desc11' + +OpcodeDesc EncoderBase::opcodes[Mnemonic_Count][MAX_OPCODES] = + ++---------------+---------------+---------------+---------------+ +| opcode_desc00 | opcode_desc01 | opcode_desc02 | last_opcode | ... ++---------------+---------------+---------------+---------------+ +| opcode_desc10 | opcode_desc11 | last_opcode | xxx | <- Mnemonic ++---------------+---------------+---------------+---------------+ +| opcode_desc20 | opcode_desc21 | opcode_desc22 | opcode_desc23 | ... ++---------------+---------------+---------------+---------------+ + ... + ^ + | + n + +Now, use 'opcode_desc11'. + +============================================================================= +The array of opcodes descriptions (EncodeBase::opcodes) is specially prepared +to maximize performance - the EncoderBase::encode() is quite hot on client +applications for the Jitrino/Jitrino.JET. +The preparation is that opcode descriptions from the 'master' encoding table +are preprocessed and a special set of OpcodeDesc prepared: +First, the 'raw' opcode bytes are extracted. Here, 'raw' means the bytes that +do not depened on any operands values, do not require any analysis and can be +simply copied into the output buffer during encoding. Also, number of these +'raw' bytes is counted. The fields are OpcodeDesc::opcode and +OpcodeDesc::opcode_len. + +Then the fisrt non-implicit operand found and its index is stored in +OpcodeDesc::first_opnd. + +The bytes that require processing and analysis ('/r', '+i', etc) are +extracted and stored in OpcodeDesc::aux0 and OpcodeDesc::aux1 fields. + +Here, a special trick is performed: + Some opcodes have register/memory operand, but this is not reflected in + opcode column - for example, (MOVQ xmm64, xmm_m64). In this case, a fake + '_r' added to OpcodeDesc::aux field. + Some other opcodes have immediate operands, but this is again not + reflected in opcode column - for example, CALL cd or PUSH imm32. + In this case, a fake '/cd' or fake '/id' added to appropriate + OpcodeDesc::aux field. + +The OpcodeDesc::last is non-zero for the final OpcodeDesc record (which does +not have valid data itself). +*/ + +// TODO: To extend flexibility, replace bool fields in MnemonicDesc & +// MnemonicInfo with a set of flags packed into integer field. + +unsigned short EncoderBase::getHash(const OpcodeInfo* odesc) +{ + /* + NOTE: any changes in the hash computation must be stricty balanced with + EncoderBase::Operand::hash_it and EncoderBase::Operands() + */ + unsigned short hash = 0; + // The hash computation, uses fast way - table selection instead of if-s. + if (odesc->roles.count > 0) { + OpndKind kind = odesc->opnds[0].kind; + OpndSize size = odesc->opnds[0].size; + assert(kind<COUNTOF(kind_hash)); + assert(size<COUNTOF(size_hash)); + hash = get_kind_hash(kind) | get_size_hash(size); + } + + if (odesc->roles.count > 1) { + OpndKind kind = odesc->opnds[1].kind; + OpndSize size = odesc->opnds[1].size; + assert(kind<COUNTOF(kind_hash)); + assert(size<COUNTOF(size_hash)); + hash = (hash<<HASH_BITS_PER_OPERAND) | + (get_kind_hash(kind) | get_size_hash(size)); + } + + if (odesc->roles.count > 2) { + OpndKind kind = odesc->opnds[2].kind; + OpndSize size = odesc->opnds[2].size; + assert(kind<COUNTOF(kind_hash)); + assert(size<COUNTOF(size_hash)); + hash = (hash<<HASH_BITS_PER_OPERAND) | + (get_kind_hash(kind) | get_size_hash(size)); + } + assert(hash <= HASH_MAX); + return hash; +} + + +#define BEGIN_MNEMONIC(mn, flags, roles) \ + { Mnemonic_##mn, flags, roles, #mn, +#define END_MNEMONIC() }, +#define BEGIN_OPCODES() { +#define END_OPCODES() { OpcodeInfo::all, {OpcodeByteKind_LAST}, {}, {0, 0, 0, 0}}} + + +static MnemonicInfo masterEncodingTable[] = { +// +// Null +// +BEGIN_MNEMONIC(Null, MF_NONE, N) +BEGIN_OPCODES() +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(LAHF, MF_USES_FLAGS, D) +BEGIN_OPCODES() +// TheManual says it's not always supported in em64t mode, thus excluding it + {OpcodeInfo::ia32, {0x9F}, {EAX}, D }, +END_OPCODES() +END_MNEMONIC() +// +// ALU mnemonics - add, adc, or, xor, and, cmp, sub, sbb +// as they differ only in the opcode extention (/digit) number and +// in which number the opcode start from, the opcode definitions +// for those instructions are packed together +// +// The 'opcode_starts_from' and 'opcode_ext' in DEFINE_ALU_OPCODES() +// are enough to define OpcodeInfo::all opcodes and the 'first_opcode' +// parameter is only due to ADD instruction, which requires an zero opcode +// byte which, in turn, is coded especially in the current coding scheme. +// + +#define DEFINE_ALU_OPCODES( opc_ext, opcode_starts_from, first_opcode, def_use ) \ +\ + {OpcodeInfo::decoder, {opcode_starts_from + 4, ib}, {AL, imm8}, DU_U },\ + {OpcodeInfo::decoder, {Size16, opcode_starts_from + 5, iw}, {AX, imm16}, DU_U },\ + {OpcodeInfo::decoder, {opcode_starts_from + 5, id}, {EAX, imm32}, DU_U },\ + {OpcodeInfo::decoder64, {REX_W, opcode_starts_from+5, id}, {RAX, imm32s},DU_U },\ +\ + {OpcodeInfo::all, {0x80, opc_ext, ib}, {r_m8, imm8}, def_use },\ + {OpcodeInfo::all, {Size16, 0x81, opc_ext, iw}, {r_m16, imm16}, def_use },\ + {OpcodeInfo::all, {0x81, opc_ext, id}, {r_m32, imm32}, def_use },\ + {OpcodeInfo::em64t, {REX_W, 0x81, opc_ext, id}, {r_m64, imm32s}, def_use },\ +\ + {OpcodeInfo::all, {Size16, 0x83, opc_ext, ib}, {r_m16, imm8s}, def_use },\ + {OpcodeInfo::all, {0x83, opc_ext, ib}, {r_m32, imm8s}, def_use },\ + {OpcodeInfo::em64t, {REX_W, 0x83, opc_ext, ib}, {r_m64, imm8s}, def_use },\ +\ + {OpcodeInfo::all, {first_opcode, _r}, {r_m8, r8}, def_use },\ +\ + {OpcodeInfo::all, {Size16, opcode_starts_from+1, _r}, {r_m16, r16}, def_use },\ + {OpcodeInfo::all, {opcode_starts_from+1, _r}, {r_m32, r32}, def_use },\ + {OpcodeInfo::em64t, {REX_W, opcode_starts_from+1, _r}, {r_m64, r64}, def_use },\ +\ + {OpcodeInfo::all, {opcode_starts_from+2, _r}, {r8, r_m8}, def_use },\ +\ + {OpcodeInfo::all, {Size16, opcode_starts_from+3, _r}, {r16, r_m16}, def_use },\ + {OpcodeInfo::all, {opcode_starts_from+3, _r}, {r32, r_m32}, def_use },\ + {OpcodeInfo::em64t, {REX_W, opcode_starts_from+3, _r}, {r64, r_m64}, def_use }, + +BEGIN_MNEMONIC(ADD, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_0, 0x00, OxOO, DU_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(OR, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_1, 0x08, 0x08, DU_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(ADC, MF_AFFECTS_FLAGS|MF_USES_FLAGS|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_2, 0x10, 0x10, DU_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(SBB, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_3, 0x18, 0x18, DU_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(AND, MF_AFFECTS_FLAGS|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_4, 0x20, 0x20, DU_U ) +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(SUB, MF_AFFECTS_FLAGS|MF_SAME_ARG_NO_USE, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES(_5, 0x28, 0x28, DU_U ) +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(XOR, MF_AFFECTS_FLAGS|MF_SYMMETRIC|MF_SAME_ARG_NO_USE, DU_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES( _6, 0x30, 0x30, DU_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMP, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + DEFINE_ALU_OPCODES( _7, 0x38, 0x38, U_U ) +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMPXCHG, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xB0, _r}, {r_m8, r8, AL}, DU_DU_DU }, + {OpcodeInfo::all, {Size16, 0x0F, 0xB1, _r}, {r_m16, r16, AX}, DU_DU_DU }, + {OpcodeInfo::all, {0x0F, 0xB1, _r}, {r_m32, r32, EAX}, DU_DU_DU}, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB1, _r}, {r_m64, r64, RAX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMPXCHG8B, MF_AFFECTS_FLAGS, D) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xC7, _1}, {m64}, DU }, +END_OPCODES() +END_MNEMONIC() + +#undef DEFINE_ALU_OPCODES +// +// +// +BEGIN_MNEMONIC(ADDSD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x58, _r}, {xmm64, xmm_m64}, DU_U}, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(ADDSS, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x58, _r}, {xmm32, xmm_m32}, DU_U}, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(BSF, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xBC}, {r32, r_m32}, D_U}, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(BSR, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xBD}, {r32, r_m32}, D_U}, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(CALL, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xE8, cd}, {rel32}, U }, + {OpcodeInfo::ia32, {Size16, 0xE8, cw}, {rel16}, U }, + {OpcodeInfo::ia32, {0xFF, _2}, {r_m32}, U }, + {OpcodeInfo::em64t, {0xFF, _2}, {r_m64}, U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMC, MF_USES_FLAGS|MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::decoder, {0xF5}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +//TODO: Workaround. Actually, it's D_DU, but Jitrino's CG thinks it's D_U +BEGIN_MNEMONIC(CDQ, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x99}, {DX, AX}, D_U }, + {OpcodeInfo::all, {0x99}, {EDX, EAX}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x99}, {RDX, RAX}, D_U }, +END_OPCODES() +END_MNEMONIC() + +#define DEFINE_CMOVcc_MNEMONIC( cc ) \ + BEGIN_MNEMONIC(CMOV##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU_U ) \ +BEGIN_OPCODES() \ + {OpcodeInfo::all, {Size16, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r16, r_m16}, DU_U }, \ + {OpcodeInfo::all, {0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r32, r_m32}, DU_U }, \ + {OpcodeInfo::em64t, {REX_W, 0x0F, 0x40 + ConditionMnemonic_##cc, _r}, {r64, r_m64}, DU_U }, \ +END_OPCODES() \ +END_MNEMONIC() + +DEFINE_CMOVcc_MNEMONIC(O) +DEFINE_CMOVcc_MNEMONIC(NO) +DEFINE_CMOVcc_MNEMONIC(B) +DEFINE_CMOVcc_MNEMONIC(NB) +DEFINE_CMOVcc_MNEMONIC(Z) +DEFINE_CMOVcc_MNEMONIC(NZ) +DEFINE_CMOVcc_MNEMONIC(BE) +DEFINE_CMOVcc_MNEMONIC(NBE) +DEFINE_CMOVcc_MNEMONIC(S) +DEFINE_CMOVcc_MNEMONIC(NS) +DEFINE_CMOVcc_MNEMONIC(P) +DEFINE_CMOVcc_MNEMONIC(NP) +DEFINE_CMOVcc_MNEMONIC(L) +DEFINE_CMOVcc_MNEMONIC(NL) +DEFINE_CMOVcc_MNEMONIC(LE) +DEFINE_CMOVcc_MNEMONIC(NLE) + +#undef DEFINE_CMOVcc_MNEMONIC + +/***************************************************************************** + ***** SSE conversion routines ***** +*****************************************************************************/ +// +// double -> float +BEGIN_MNEMONIC(CVTSD2SS, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x5A, _r}, {xmm32, xmm_m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// double -> I_32 +BEGIN_MNEMONIC(CVTSD2SI, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x2D, _r}, {r32, xmm_m64}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2D, _r}, {r64, xmm_m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// double [truncated] -> I_32 +BEGIN_MNEMONIC(CVTTSD2SI, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x2C, _r}, {r32, xmm_m64}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2C, _r}, {r64, xmm_m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// float -> double +BEGIN_MNEMONIC(CVTSS2SD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x5A, _r}, {xmm64, xmm_m32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// float -> I_32 +BEGIN_MNEMONIC(CVTSS2SI, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x2D, _r}, {r32, xmm_m32}, D_U}, + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2D, _r}, {r64, xmm_m32}, D_U}, +END_OPCODES() +END_MNEMONIC() + +// float [truncated] -> I_32 +BEGIN_MNEMONIC(CVTTSS2SI, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x2C, _r}, {r32, xmm_m32}, D_U}, + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2C, _r}, {r64, xmm_m32}, D_U}, +END_OPCODES() +END_MNEMONIC() + +// I_32 -> double +BEGIN_MNEMONIC(CVTSI2SD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m32}, D_U}, + {OpcodeInfo::em64t, {REX_W, 0xF2, 0x0F, 0x2A, _r}, {xmm64, r_m64}, D_U}, +END_OPCODES() +END_MNEMONIC() + +// I_32 -> float +BEGIN_MNEMONIC(CVTSI2SS, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m32}, D_U}, + {OpcodeInfo::em64t, {REX_W, 0xF3, 0x0F, 0x2A, _r}, {xmm32, r_m64}, D_U}, +END_OPCODES() +END_MNEMONIC() + +// +// ~ SSE conversions +// + +BEGIN_MNEMONIC(DEC, MF_AFFECTS_FLAGS, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xFE, _1}, {r_m8}, DU }, + + {OpcodeInfo::all, {Size16, 0xFF, _1}, {r_m16}, DU }, + {OpcodeInfo::all, {0xFF, _1}, {r_m32}, DU }, + {OpcodeInfo::em64t, {REX_W, 0xFF, _1}, {r_m64}, DU }, + + {OpcodeInfo::ia32, {Size16, 0x48|rw}, {r16}, DU }, + {OpcodeInfo::ia32, {0x48|rd}, {r32}, DU }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(DIVSD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x5E, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(DIVSS, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x5E, _r}, {xmm32, xmm_m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +/**************************************************************************** + ***** FPU operations ***** +****************************************************************************/ + +BEGIN_MNEMONIC(FADDP, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDE, 0xC1}, {FP0D}, DU }, + {OpcodeInfo::all, {0xDE, 0xC1}, {FP0S}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLDZ, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xEE}, {FP0D}, D }, + {OpcodeInfo::all, {0xD9, 0xEE}, {FP0S}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FADD, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDC, _0}, {FP0D, m64}, DU_U }, + {OpcodeInfo::all, {0xD8, _0}, {FP0S, m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSUBP, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDE, 0xE9}, {FP0D}, DU }, + {OpcodeInfo::all, {0xDE, 0xE9}, {FP0S}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSUB, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDC, _4}, {FP0D, m64}, DU_U }, + {OpcodeInfo::all, {0xD8, _4}, {FP0S, m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FISUB, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDA, _4}, {FP0S, m32}, DU_U }, +// {OpcodeInfo::all, {0xDE, _4}, {FP0S, m16}, DU_U }, +END_OPCODES() +END_MNEMONIC() + + + +BEGIN_MNEMONIC(FMUL, MF_NONE, DU_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD8, _1}, {FP0S, m32}, DU_U }, + {OpcodeInfo::all, {0xDC, _1}, {FP0D, m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FMULP, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDE, 0xC9}, {FP0D}, DU }, + {OpcodeInfo::all, {0xDE, 0xC9}, {FP0S}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FDIVP, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDE, 0xF9}, {FP0D}, DU }, + {OpcodeInfo::all, {0xDE, 0xF9}, {FP0S}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FDIV, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDC, _6}, {FP0D, m64}, DU_U }, + {OpcodeInfo::all, {0xD8, _6}, {FP0S, m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(FUCOM, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDD, 0xE1}, {FP0D, FP1D}, DU_U }, + {OpcodeInfo::all, {0xDD, 0xE1}, {FP0S, FP1S}, DU_U }, + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp32}, DU }, + {OpcodeInfo::all, {0xDD, 0xE0|_i}, {fp64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FUCOMI, MF_NONE, D_U ) +BEGIN_OPCODES() + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp32}, DU }, + {OpcodeInfo::all, {0xDB, 0xE8|_i}, {fp64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FUCOMP, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDD, 0xE9}, {FP0D, FP1D}, DU_U }, + {OpcodeInfo::all, {0xDD, 0xE9}, {FP0S, FP1S}, DU_U }, + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp32}, DU }, + {OpcodeInfo::all, {0xDD, 0xE8|_i}, {fp64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FUCOMIP, MF_NONE, D_U ) +BEGIN_OPCODES() + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp32}, DU }, + {OpcodeInfo::all, {0xDF, 0xE8|_i}, {fp64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FUCOMPP, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDA, 0xE9}, {FP0D, FP1D}, DU_U }, + {OpcodeInfo::all, {0xDA, 0xE9}, {FP0S, FP1S}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLDCW, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, _5}, {m16}, U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FNSTCW, MF_NONE, D) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, _7}, {m16}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSTSW, MF_NONE, D) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x9B, 0xDF, 0xE0}, {EAX}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FNSTSW, MF_NONE, D) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDF, 0xE0}, {EAX}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FCHS, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xE0}, {FP0D}, DU }, + {OpcodeInfo::all, {0xD9, 0xE0}, {FP0S}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FCLEX, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x9B, 0xDB, 0xE2}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FNCLEX, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDB, 0xE2}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +//BEGIN_MNEMONIC(FDECSTP, MF_NONE, N) +// BEGIN_OPCODES() +// {OpcodeInfo::all, {0xD9, 0xF6}, {}, N }, +// END_OPCODES() +//END_MNEMONIC() + +BEGIN_MNEMONIC(FILD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U }, + {OpcodeInfo::all, {0xDF, _5}, {FP0D, m64}, D_U }, + {OpcodeInfo::all, {0xDB, _0}, {FP0S, m32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +//BEGIN_MNEMONIC(FINCSTP, MF_NONE, N) +// BEGIN_OPCODES() +// {OpcodeInfo::all, {0xD9, 0xF7}, {}, N }, +// END_OPCODES() +//END_MNEMONIC() + +BEGIN_MNEMONIC(FIST, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDB, _2}, {m32, FP0S}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FISTP, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDB, _3}, {m32, FP0S}, D_U }, + {OpcodeInfo::all, {0xDF, _7}, {m64, FP0D}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FISTTP, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xDD, _1}, {m64, FP0D}, D_U }, + {OpcodeInfo::all, {0xDB, _1}, {m32, FP0S}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FRNDINT, MF_NONE, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xFC}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xFC}, {FP0D}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, _0}, {FP0S, m32}, D_U }, + {OpcodeInfo::all, {0xDD, _0}, {FP0D, m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLDLG2, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xEC}, {FP0S}, D }, + {OpcodeInfo::all, {0xD9, 0xEC}, {FP0D}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLDLN2, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xED}, {FP0S}, D }, + {OpcodeInfo::all, {0xD9, 0xED}, {FP0D}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FLD1, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xE8}, {FP0S}, D }, + {OpcodeInfo::all, {0xD9, 0xE8}, {FP0D}, D }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(FPREM, MF_NONE, N) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF8}, {}, N }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FPREM1, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF5}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FST, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, _2}, {m32, FP0S}, D_U }, + {OpcodeInfo::all, {0xDD, _2}, {m64, FP0D}, D_U }, + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp32}, D }, + {OpcodeInfo::all, {0xDD, 0xD0|_i}, {fp64}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSTP, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, _3}, {m32, FP0S}, D_U }, + {OpcodeInfo::all, {0xDD, _3}, {m64, FP0D}, D_U }, + // A little trick: actually, these 2 opcodes take only index of the + // needed register. To make the things similar to other instructions + // we encode here as if they took FPREG. + {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp32}, D }, + {OpcodeInfo::all, {0xDD, 0xD8|_i}, {fp64}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSQRT, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xFA}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xFA}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(FYL2X, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF1}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xF1}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(FYL2XP1, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF9}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xF9}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(F2XM1, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF0}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xF0}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FPATAN, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF3}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xF3}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FXCH, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xC9}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xC9}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSCALE, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xFD}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xFD}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FABS, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xE1}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xE1}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FSIN, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xFE}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xFE}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FCOS, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xFF}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xFF}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(FPTAN, MF_NONE, DU) + BEGIN_OPCODES() + {OpcodeInfo::all, {0xD9, 0xF2}, {FP0S}, DU }, + {OpcodeInfo::all, {0xD9, 0xF2}, {FP0D}, DU }, + END_OPCODES() +END_MNEMONIC() + +// +// ~ FPU +// + +BEGIN_MNEMONIC(DIV, MF_AFFECTS_FLAGS, DU_DU_U) +BEGIN_OPCODES() +#if !defined(_EM64T_) + {OpcodeInfo::all, {0xF6, _6}, {AH, AL, r_m8}, DU_DU_U }, + {OpcodeInfo::all, {Size16, 0xF7, _6}, {DX, AX, r_m16}, DU_DU_U }, +#endif + {OpcodeInfo::all, {0xF7, _6}, {EDX, EAX, r_m32}, DU_DU_U }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _6}, {RDX, RAX, r_m64}, DU_DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(IDIV, MF_AFFECTS_FLAGS, DU_DU_U) +BEGIN_OPCODES() +#if !defined(_EM64T_) + {OpcodeInfo::all, {0xF6, _7}, {AH, AL, r_m8}, DU_DU_U }, + {OpcodeInfo::all, {Size16, 0xF7, _7}, {DX, AX, r_m16}, DU_DU_U }, +#endif + {OpcodeInfo::all, {0xF7, _7}, {EDX, EAX, r_m32}, DU_DU_U }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _7}, {RDX, RAX, r_m64}, DU_DU_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(IMUL, MF_AFFECTS_FLAGS, D_DU_U) +BEGIN_OPCODES() + /*{OpcodeInfo::all, {0xF6, _5}, {AH, AL, r_m8}, D_DU_U }, + {OpcodeInfo::all, {Size16, 0xF7, _5}, {DX, AX, r_m16}, D_DU_U }, + */ + // + {OpcodeInfo::all, {0xF7, _5}, {EDX, EAX, r_m32}, D_DU_U }, + //todo: this opcode's hash conflicts with IMUL r64,r_m64 - they're both 0. + // this particular is not currently used, so we may safely drop it, but need to + // revisit the hash implementation + // {OpcodeInfo::em64t, {REX_W, 0xF7, _5}, {RDX, RAX, r_m64}, D_DU_U }, + // + {OpcodeInfo::all, {Size16, 0x0F, 0xAF, _r}, {r16,r_m16}, DU_U }, + {OpcodeInfo::all, {0x0F, 0xAF, _r}, {r32,r_m32}, DU_U }, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xAF, _r}, {r64,r_m64}, DU_U }, + {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,r_m16,imm8s}, D_DU_U }, + {OpcodeInfo::all, {0x6B, _r, ib}, {r32,r_m32,imm8s}, D_DU_U }, + {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,r_m64,imm8s}, D_DU_U }, + {OpcodeInfo::all, {Size16, 0x6B, _r, ib}, {r16,imm8s}, DU_U }, + {OpcodeInfo::all, {0x6B, _r, ib}, {r32,imm8s}, DU_U }, + {OpcodeInfo::em64t, {REX_W, 0x6B, _r, ib}, {r64,imm8s}, DU_U }, + {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,r_m16,imm16}, D_U_U }, + {OpcodeInfo::all, {0x69, _r, id}, {r32,r_m32,imm32}, D_U_U }, + {OpcodeInfo::em64t, {REX_W, 0x69, _r, id}, {r64,r_m64,imm32s}, D_U_U }, + {OpcodeInfo::all, {Size16, 0x69, _r, iw}, {r16,imm16}, DU_U }, + {OpcodeInfo::all, {0x69, _r, id}, {r32,imm32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MUL, MF_AFFECTS_FLAGS, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF6, _4}, {AX, AL, r_m8}, D_DU_U }, + {OpcodeInfo::all, {Size16, 0xF7, _4}, {DX, AX, r_m16}, D_DU_U }, + {OpcodeInfo::all, {0xF7, _4}, {EDX, EAX, r_m32}, D_DU_U }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _4}, {RDX, RAX, r_m64}, D_DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(INC, MF_AFFECTS_FLAGS, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xFE, _0}, {r_m8}, DU }, + {OpcodeInfo::all, {Size16, 0xFF, _0}, {r_m16}, DU }, + {OpcodeInfo::all, {0xFF, _0}, {r_m32}, DU }, + {OpcodeInfo::em64t, {REX_W, 0xFF, _0}, {r_m64}, DU }, + {OpcodeInfo::ia32, {Size16, 0x40|rw}, {r16}, DU }, + {OpcodeInfo::ia32, {0x40|rd}, {r32}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(INT3, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xCC}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +#define DEFINE_Jcc_MNEMONIC( cc ) \ + BEGIN_MNEMONIC(J##cc, MF_USES_FLAGS|MF_CONDITIONAL, U ) \ +BEGIN_OPCODES() \ + {OpcodeInfo::all, {0x70 + ConditionMnemonic_##cc, cb }, { rel8 }, U }, \ + {OpcodeInfo::ia32, {Size16, 0x0F, 0x80 + ConditionMnemonic_##cc, cw}, { rel16 }, U }, \ + {OpcodeInfo::all, {0x0F, 0x80 + ConditionMnemonic_##cc, cd}, { rel32 }, U }, \ +END_OPCODES() \ +END_MNEMONIC() + + +DEFINE_Jcc_MNEMONIC(O) +DEFINE_Jcc_MNEMONIC(NO) +DEFINE_Jcc_MNEMONIC(B) +DEFINE_Jcc_MNEMONIC(NB) +DEFINE_Jcc_MNEMONIC(Z) +DEFINE_Jcc_MNEMONIC(NZ) +DEFINE_Jcc_MNEMONIC(BE) +DEFINE_Jcc_MNEMONIC(NBE) + +DEFINE_Jcc_MNEMONIC(S) +DEFINE_Jcc_MNEMONIC(NS) +DEFINE_Jcc_MNEMONIC(P) +DEFINE_Jcc_MNEMONIC(NP) +DEFINE_Jcc_MNEMONIC(L) +DEFINE_Jcc_MNEMONIC(NL) +DEFINE_Jcc_MNEMONIC(LE) +DEFINE_Jcc_MNEMONIC(NLE) + +#undef DEFINE_Jcc_MNEMONIC + +BEGIN_MNEMONIC(JMP, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xEB, cb}, {rel8}, U }, + {OpcodeInfo::ia32, {Size16, 0xE9, cw}, {rel16}, U }, + {OpcodeInfo::all, {0xE9, cd}, {rel32}, U }, + {OpcodeInfo::ia32, {Size16, 0xFF, _4}, {r_m16}, U }, + {OpcodeInfo::ia32, {0xFF, _4}, {r_m32}, U }, + {OpcodeInfo::em64t, {0xFF, _4}, {r_m64}, U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(LEA, MF_NONE, D_U ) +BEGIN_OPCODES() + /* + A special case: the LEA instruction itself does not care about size of + second operand. This is obviuos why it is, and thus in The Manual, a + simple 'm' without size is used. + However, in the Jitrino's instrucitons we'll have an operand with a size. + Also, the hashing scheme is not supposed to handle OpndSize_Null, and + making it to do so will lead to unnecessary complication of hashing + scheme. Thus, instead of handling it as a special case, we simply make + copies of the opcodes with sizes set. + {OpcodeInfo::all, {0x8D, _r}, {r32, m}, D_U }, + {OpcodeInfo::em64t, {0x8D, _r}, {r64, m}, D_U }, + */ + //Android x86: keep r32, m32 only, otherwise, will have decoding error + //{OpcodeInfo::all, {0x8D, _r}, {r32, m8}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m8}, D_U }, + //{OpcodeInfo::all, {0x8D, _r}, {r32, m16}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m16}, D_U }, + {OpcodeInfo::all, {0x8D, _r}, {r32, m32}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m32}, D_U }, + {OpcodeInfo::all, {0x8D, _r}, {r32, m64}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x8D, _r}, {r64, m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(LOOP, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xE2, cb}, {ECX, rel8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(LOOPE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xE1, cb}, {ECX, rel8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(LOOPNE, MF_AFFECTS_FLAGS|MF_USES_FLAGS, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xE0, cb}, {ECX, rel8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOV, MF_NONE, D_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x88, _r}, {r_m8,r8}, D_U }, + + {OpcodeInfo::all, {Size16, 0x89, _r}, {r_m16,r16}, D_U }, + {OpcodeInfo::all, {0x89, _r}, {r_m32,r32}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x89, _r}, {r_m64,r64}, D_U }, + {OpcodeInfo::all, {0x8A, _r}, {r8,r_m8}, D_U }, + + {OpcodeInfo::all, {Size16, 0x8B, _r}, {r16,r_m16}, D_U }, + {OpcodeInfo::all, {0x8B, _r}, {r32,r_m32}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x8B, _r}, {r64,r_m64}, D_U }, + + {OpcodeInfo::all, {0xB0|rb}, {r8,imm8}, D_U }, + + {OpcodeInfo::all, {Size16, 0xB8|rw}, {r16,imm16}, D_U }, + {OpcodeInfo::all, {0xB8|rd}, {r32,imm32}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0xB8|rd}, {r64,imm64}, D_U }, + {OpcodeInfo::all, {0xC6, _0}, {r_m8,imm8}, D_U }, + + {OpcodeInfo::all, {Size16, 0xC7, _0}, {r_m16,imm16}, D_U }, + {OpcodeInfo::all, {0xC7, _0}, {r_m32,imm32}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0xC7, _0}, {r_m64,imm32s}, D_U }, + + {OpcodeInfo::decoder, {0xA0}, {AL, moff8}, D_U }, + {OpcodeInfo::decoder, {Size16, 0xA1}, {AX, moff16}, D_U }, + {OpcodeInfo::decoder, {0xA1}, {EAX, moff32}, D_U }, + //{OpcodeInfo::decoder64, {REX_W, 0xA1}, {RAX, moff64}, D_U }, + + {OpcodeInfo::decoder, {0xA2}, {moff8, AL}, D_U }, + {OpcodeInfo::decoder, {Size16, 0xA3}, {moff16, AX}, D_U }, + {OpcodeInfo::decoder, {0xA3}, {moff32, EAX}, D_U }, + //{OpcodeInfo::decoder64, {REX_W, 0xA3}, {moff64, RAX}, D_U }, +END_OPCODES() +END_MNEMONIC() + + + +BEGIN_MNEMONIC(XCHG, MF_NONE, DU_DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x87, _r}, {r_m32,r32}, DU_DU }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(MOVQ, MF_NONE, D_U ) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0x6F, _r}, {mm64, mm_m64}, D_U }, + {OpcodeInfo::all, {0x0F, 0x7F, _r}, {mm_m64, mm64}, D_U }, +#endif + {OpcodeInfo::all, {0xF3, 0x0F, 0x7E }, {xmm64, xmm_m64}, D_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0xD6 }, {xmm_m64, xmm64}, D_U }, +// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r_m64}, D_U }, +// {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r_m64, xmm64}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x6E, _r}, {xmm64, r64}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x66, 0x0F, 0x7E, _r}, {r64, xmm64}, D_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(MOVD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x6E, _r}, {xmm32, r_m32}, D_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x7E, _r}, {r_m32, xmm32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// +// A bunch of MMX instructions +// +#ifdef _HAVE_MMX_ + +BEGIN_MNEMONIC(EMMS, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0x77}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +#endif + +BEGIN_MNEMONIC(PADDQ, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xD4, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xD4, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PAND, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xDB, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xDB, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(POR, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xEB, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xEB, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSUBQ, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xFB, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xFB, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PANDN, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xDF, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xDF, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() +BEGIN_MNEMONIC(PSLLQ, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xF3, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xF3, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x73, _6, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() +BEGIN_MNEMONIC(PSRLQ, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xD3, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xD3, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x73, _2, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PXOR, MF_NONE, DU_U) +BEGIN_OPCODES() +#ifdef _HAVE_MMX_ + {OpcodeInfo::all, {0x0F, 0xEF, _r}, {mm64, mm_m64}, DU_U }, +#endif + {OpcodeInfo::all, {0x66, 0x0F, 0xEF, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(MOVAPD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVAPS, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0x28, _r}, {xmm64, xmm_m64}, D_U }, + {OpcodeInfo::all, {0x0F, 0x29, _r}, {xmm_m64, xmm64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(SHUFPS, MF_NONE, D_U_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xC6, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(MOVSD, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x10, _r}, {xmm64, xmm_m64}, D_U }, + {OpcodeInfo::all, {0xF2, 0x0F, 0x11, _r}, {xmm_m64, xmm64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVSS, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x10, _r}, {xmm32, xmm_m32}, D_U }, + {OpcodeInfo::all, {0xF3, 0x0F, 0x11, _r}, {xmm_m32, xmm32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVSX, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {Size16, 0x0F, 0xBE, _r}, {r16, r_m8s}, D_U }, + {OpcodeInfo::all, {0x0F, 0xBE, _r}, {r32, r_m8s}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBE, _r}, {r64, r_m8s}, D_U }, + + {OpcodeInfo::all, {0x0F, 0xBF, _r}, {r32, r_m16s}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xBF, _r}, {r64, r_m16s}, D_U }, + + {OpcodeInfo::em64t, {REX_W, 0x63, _r}, {r64, r_m32s}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVZX, MF_NONE, D_U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {Size16, 0x0F, 0xB6, _r}, {r16, r_m8u}, D_U }, + {OpcodeInfo::all, {0x0F, 0xB6, _r}, {r32, r_m8u}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB6, _r}, {r64, r_m8u}, D_U }, + + {OpcodeInfo::all, {0x0F, 0xB7, _r}, {r32, r_m16u}, D_U }, + {OpcodeInfo::em64t, {REX_W, 0x0F, 0xB7, _r}, {r64, r_m16u}, D_U }, + //workaround to get r/rm32->r64 ZX mov functionality: + //simple 32bit reg copying zeros high bits in 64bit reg + {OpcodeInfo::em64t, {0x8B, _r}, {r64, r_m32u}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MULSD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x59, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MULSS, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x59, _r}, {xmm32, xmm_m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(NEG, MF_AFFECTS_FLAGS, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF6, _3}, {r_m8}, DU }, + + {OpcodeInfo::all, {Size16, 0xF7, _3}, {r_m16}, DU }, + {OpcodeInfo::all, {0xF7, _3}, {r_m32}, DU }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _3}, {r_m64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(NOP, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x90}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(NOT, MF_AFFECTS_FLAGS, DU ) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF6, _2}, {r_m8}, DU }, + {OpcodeInfo::all, {Size16, 0xF7, _2}, {r_m16}, DU }, + {OpcodeInfo::all, {0xF7, _2}, {r_m32}, DU }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _2}, {r_m64}, DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(POP, MF_NONE, D) +BEGIN_OPCODES() + {OpcodeInfo::all, {Size16, 0x8F, _0}, {r_m16}, D }, + {OpcodeInfo::ia32, {0x8F, _0}, {r_m32}, D }, + {OpcodeInfo::em64t, {0x8F, _0}, {r_m64}, D }, + + {OpcodeInfo::all, {Size16, 0x58|rw }, {r16}, D }, + {OpcodeInfo::ia32, {0x58|rd }, {r32}, D }, + {OpcodeInfo::em64t, {0x58|rd }, {r64}, D }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(POPFD, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x9D}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PREFETCH, MF_NONE, U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0x18, _0}, {m8}, U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PUSH, MF_NONE, U ) +BEGIN_OPCODES() + {OpcodeInfo::all, {Size16, 0xFF, _6}, {r_m16}, U }, + {OpcodeInfo::ia32, {0xFF, _6}, {r_m32}, U }, + {OpcodeInfo::em64t, {0xFF, _6}, {r_m64}, U }, + + {OpcodeInfo::all, {Size16, 0x50|rw }, {r16}, U }, + {OpcodeInfo::ia32, {0x50|rd }, {r32}, U }, + {OpcodeInfo::em64t, {0x50|rd }, {r64}, U }, + + {OpcodeInfo::all, {0x6A}, {imm8}, U }, + {OpcodeInfo::all, {Size16, 0x68}, {imm16}, U }, + {OpcodeInfo::ia32, {0x68}, {imm32}, U }, +// {OpcodeInfo::em64t, {0x68}, {imm64}, U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PUSHFD, MF_USES_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x9C}, {}, N }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(RET, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xC3}, {}, N }, + {OpcodeInfo::all, {0xC2, iw}, {imm16}, U }, +END_OPCODES() +END_MNEMONIC() + +#define DEFINE_SETcc_MNEMONIC( cc ) \ + BEGIN_MNEMONIC(SET##cc, MF_USES_FLAGS|MF_CONDITIONAL, DU) \ +BEGIN_OPCODES() \ + {OpcodeInfo::all, {0x0F, 0x90 + ConditionMnemonic_##cc}, {r_m8}, DU }, \ +END_OPCODES() \ +END_MNEMONIC() + +DEFINE_SETcc_MNEMONIC(O) +DEFINE_SETcc_MNEMONIC(NO) +DEFINE_SETcc_MNEMONIC(B) +DEFINE_SETcc_MNEMONIC(NB) +DEFINE_SETcc_MNEMONIC(Z) +DEFINE_SETcc_MNEMONIC(NZ) +DEFINE_SETcc_MNEMONIC(BE) +DEFINE_SETcc_MNEMONIC(NBE) + +DEFINE_SETcc_MNEMONIC(S) +DEFINE_SETcc_MNEMONIC(NS) +DEFINE_SETcc_MNEMONIC(P) +DEFINE_SETcc_MNEMONIC(NP) +DEFINE_SETcc_MNEMONIC(L) +DEFINE_SETcc_MNEMONIC(NL) +DEFINE_SETcc_MNEMONIC(LE) +DEFINE_SETcc_MNEMONIC(NLE) + +#undef DEFINE_SETcc_MNEMONIC + +#define DEFINE_SHIFT_MNEMONIC(nam, slash_num, flags) \ +BEGIN_MNEMONIC(nam, flags, DU_U) \ +BEGIN_OPCODES()\ + /* D0 & D1 opcodes are added w/o 2nd operand (1) because */\ + /* they are used for decoding only so only instruction length is needed */\ + {OpcodeInfo::decoder, {0xD0, slash_num}, {r_m8/*,const_1*/}, DU },\ + {OpcodeInfo::all, {0xD2, slash_num}, {r_m8, CL}, DU_U },\ + {OpcodeInfo::all, {0xC0, slash_num, ib}, {r_m8, imm8}, DU_U },\ +\ + {OpcodeInfo::decoder, {Size16, 0xD1, slash_num}, {r_m16/*,const_1*/}, DU },\ + {OpcodeInfo::all, {Size16, 0xD3, slash_num}, {r_m16, CL}, DU_U },\ + {OpcodeInfo::all, {Size16, 0xC1, slash_num, ib}, {r_m16, imm8 }, DU_U },\ +\ + {OpcodeInfo::decoder, {0xD1, slash_num}, {r_m32/*,const_1*/}, DU },\ + {OpcodeInfo::decoder64, {REX_W, 0xD1, slash_num}, {r_m64/*,const_1*/}, DU },\ +\ + {OpcodeInfo::all, {0xD3, slash_num}, {r_m32, CL}, DU_U },\ + {OpcodeInfo::em64t, {REX_W, 0xD3, slash_num}, {r_m64, CL}, DU_U },\ +\ + {OpcodeInfo::all, {0xC1, slash_num, ib}, {r_m32, imm8}, DU_U },\ + {OpcodeInfo::em64t, {REX_W, 0xC1, slash_num, ib}, {r_m64, imm8}, DU_U },\ +END_OPCODES()\ +END_MNEMONIC() + + +DEFINE_SHIFT_MNEMONIC(ROL, _0, MF_AFFECTS_FLAGS) +DEFINE_SHIFT_MNEMONIC(ROR, _1, MF_AFFECTS_FLAGS) +DEFINE_SHIFT_MNEMONIC(RCL, _2, MF_AFFECTS_FLAGS|MF_USES_FLAGS) +DEFINE_SHIFT_MNEMONIC(RCR, _3, MF_AFFECTS_FLAGS|MF_USES_FLAGS) + +DEFINE_SHIFT_MNEMONIC(SAL, _4, MF_AFFECTS_FLAGS) +DEFINE_SHIFT_MNEMONIC(SHR, _5, MF_AFFECTS_FLAGS) +DEFINE_SHIFT_MNEMONIC(SAR, _7, MF_AFFECTS_FLAGS) + +#undef DEFINE_SHIFT_MNEMONIC + +BEGIN_MNEMONIC(SHLD, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xA5}, {r_m32, r32, CL}, DU_DU_U }, + {OpcodeInfo::all, {0x0F, 0xA4}, {r_m32, r32, imm8}, DU_DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(SHRD, MF_AFFECTS_FLAGS, N) +// TODO: the def/use info is wrong +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0xAD}, {r_m32, r32, CL}, DU_DU_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(SUBSD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x5C, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(SUBSS, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x5C, _r}, {xmm32, xmm_m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(TEST, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + + {OpcodeInfo::decoder, {0xA8, ib}, { AL, imm8}, U_U }, + {OpcodeInfo::decoder, {0xA9, iw}, { AX, imm16}, U_U }, + {OpcodeInfo::decoder, {0xA9, id}, { EAX, imm32}, U_U }, + {OpcodeInfo::decoder64, {REX_W, 0xA9, id}, { RAX, imm32s}, U_U }, + + {OpcodeInfo::all, {0xF6, _0, ib}, {r_m8,imm8}, U_U }, + + {OpcodeInfo::all, {Size16, 0xF7, _0, iw}, {r_m16,imm16}, U_U }, + {OpcodeInfo::all, {0xF7, _0, id}, {r_m32,imm32}, U_U }, + {OpcodeInfo::em64t, {REX_W, 0xF7, _0, id}, {r_m64,imm32s}, U_U }, + + {OpcodeInfo::all, {0x84, _r}, {r_m8,r8}, U_U }, + + {OpcodeInfo::all, {Size16, 0x85, _r}, {r_m16,r16}, U_U }, + {OpcodeInfo::all, {0x85, _r}, {r_m32,r32}, U_U }, + {OpcodeInfo::em64t, {REX_W, 0x85, _r}, {r_m64,r64}, U_U }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(UCOMISD, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x2E, _r}, {xmm64, xmm_m64}, U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(UCOMISS, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0x2E, _r}, {xmm32, xmm_m32}, U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(COMISD, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x2F, _r}, {xmm64, xmm_m64}, U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(COMISS, MF_AFFECTS_FLAGS, U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x0F, 0x2F, _r}, {xmm32, xmm_m32}, U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(XORPD, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0x66, 0x0F, 0x57, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(XORPS, MF_SAME_ARG_NO_USE|MF_SYMMETRIC, DU_U) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0x0F, 0x57, _r}, {xmm32, xmm_m32}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CVTDQ2PD, MF_NONE, D_U ) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0xF3, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CVTDQ2PS, MF_NONE, D_U ) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CVTTPD2DQ, MF_NONE, D_U ) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0x66, 0x0F, 0xE6}, {xmm64, xmm_m64}, D_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CVTTPS2DQ, MF_NONE, D_U ) +BEGIN_OPCODES() + //Note: they're actually 128 bits + {OpcodeInfo::all, {0xF3, 0x0F, 0x5B, _r}, {xmm32, xmm_m32}, D_U }, +END_OPCODES() +END_MNEMONIC() + +// +// String operations +// +BEGIN_MNEMONIC(STD, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xFD}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CLD, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xFC}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(SCAS, MF_AFFECTS_FLAGS, N) +// to be symmetric, this mnemonic must have either m32 or RegName_EAX +// but as long, as Jitrino's CG does not use the mnemonic, leaving it +// in its natural form +BEGIN_OPCODES() + {OpcodeInfo::all, {0xAF}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(STOS, MF_AFFECTS_FLAGS, DU_DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xAB}, {EDI, ECX, EAX}, DU_DU_U }, + {OpcodeInfo::all, {0xAA}, {EDI, ECX, AL}, DU_DU_U }, + {OpcodeInfo::em64t, {REX_W, 0xAB}, {RDI, RCX, RAX}, DU_DU_U }, +END_OPCODES() +END_MNEMONIC() + +/* +MOVS and CMPS are the special cases. +Most the code in both CG and Encoder do not expect 2 memory operands. +Also, they are not supposed to setup constrains on which register the +memory reference must reside - m8,m8 or m32,m32 is not the choice. +We can't use r8,r8 either - will have problem with 8bit EDI, ESI. +So, as the workaround we do r32,r32 and specify size of the operand through +the specific mnemonic - the same is in the codegen. +*/ +BEGIN_MNEMONIC(MOVS8, MF_NONE, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {0xA4}, {r32,r32,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {0xA4}, {r64,r64,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVS16, MF_NONE, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {Size16, 0xA5}, {r32,r32,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {Size16, 0xA5}, {r64,r64,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVS32, MF_NONE, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {0xA5}, {r32,r32,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {0xA5}, {r64,r64,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVS64, MF_NONE, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::em64t, {REX_W,0xA5}, {r64,r64,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMPSB, MF_AFFECTS_FLAGS, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {0xA6}, {ESI,EDI,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {0xA6}, {RSI,RDI,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMPSW, MF_AFFECTS_FLAGS, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {Size16, 0xA7}, {ESI,EDI,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {Size16, 0xA7}, {RSI,RDI,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(CMPSD, MF_AFFECTS_FLAGS, DU_DU_DU) +BEGIN_OPCODES() + {OpcodeInfo::ia32, {0xA7}, {ESI,EDI,ECX}, DU_DU_DU }, + {OpcodeInfo::em64t, {0xA7}, {RSI,RDI,RCX}, DU_DU_DU }, +END_OPCODES() +END_MNEMONIC() + + +BEGIN_MNEMONIC(WAIT, MF_AFFECTS_FLAGS, N) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x9B}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +// +// ~String operations +// + +// +//Note: the instructions below added for the sake of disassembling routine. +// They need to have flags, params and params usage to be defined more precisely. +// +BEGIN_MNEMONIC(LEAVE, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::decoder, {0xC9}, {}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(ENTER, MF_NONE, N) +BEGIN_OPCODES() + {OpcodeInfo::decoder, {0xC8, iw, ib}, {imm16, imm8}, N }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PADDB, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xFC, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PADDW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xFD, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PADDD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xFE, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSUBB, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xF8, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSUBW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xF9, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSUBD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xFA, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PMULLW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xD5, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PMULLD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x40, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSLLW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xF1, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _6, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSLLD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xF2, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _6, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSRAW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xE1, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _4, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSRAD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xE2, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _4, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSRLW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xD1, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x71, _2, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSRLD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xD2, _r}, {xmm64, xmm_m64}, DU_U }, + {OpcodeInfo::all, {0x66, 0x0F, 0x72, _2, ib}, {xmm64, imm8}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PMOVSXBW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x20, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSHUFB, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x00, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSHUFD, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSHUFLW, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF2, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PSHUFHW, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0xF3, 0x0F, 0x70, _r, ib}, {xmm64, xmm_m64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHADDSW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x03, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHADDW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x01, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHADDD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x02, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHSUBSW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x07, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHSUBW, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x05, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PHSUBD, MF_NONE, DU_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x38, 0x06, _r}, {xmm64, xmm_m64}, DU_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PEXTRB, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x14, _r, ib}, {r32, xmm64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PEXTRW, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0xC5, _r, ib}, {r32, xmm64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(PEXTRD, MF_NONE, D_U_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x3A, 0x16, _r, ib}, {r_m32, xmm64, imm8}, D_U_U }, +END_OPCODES() +END_MNEMONIC() + +BEGIN_MNEMONIC(MOVDQA, MF_NONE|MF_SYMMETRIC, D_U) +BEGIN_OPCODES() + {OpcodeInfo::all, {0x66, 0x0F, 0x6F, _r}, {xmm64, xmm_m64}, D_U }, + //The encoder cannot properly look up when operands are symmetric but opcode is not: + //{OpcodeInfo::all, {0x66, 0x0F, 0x7F, _r}, {xmm_m128, xmm128}, D_U }, +END_OPCODES() +END_MNEMONIC() + +}; // ~masterEncodingTable[] + +ENCODER_NAMESPACE_END + +ENCODER_NAMESPACE_START + +static int compareMnemonicInfo(const void* info1, const void* info2) +{ + Mnemonic id1, id2; + + id1 = ((const MnemonicInfo*) info1)->mn; + id2 = ((const MnemonicInfo*) info2)->mn; + if (id1 < id2) + return -1; + if (id1 > id2) + return 1; + return 0; +} + +int EncoderBase::buildTable(void) +{ + // A check: all mnemonics must be covered + assert(COUNTOF(masterEncodingTable) == Mnemonic_Count); + + // sort out the mnemonics so the list become ordered + qsort(masterEncodingTable, Mnemonic_Count, sizeof(MnemonicInfo), compareMnemonicInfo); + + // + // clear the things + // + memset(opcodesHashMap, NOHASH, sizeof(opcodesHashMap)); + memset(opcodes, 0, sizeof(opcodes)); + // + // and, finally, build it + for (unsigned i=0; i<Mnemonic_Count; i++) { + assert((Mnemonic)i == (masterEncodingTable + i)->mn); + buildMnemonicDesc(masterEncodingTable+i); + } + return 0; +} + +void EncoderBase::buildMnemonicDesc(const MnemonicInfo * minfo) +{ + MnemonicDesc& mdesc = mnemonics[minfo->mn]; + mdesc.mn = minfo->mn; + mdesc.flags = minfo->flags; + mdesc.roles = minfo->roles; + mdesc.name = minfo->name; + + // + // fill the used opcodes + // + for (unsigned i=0, oindex=0; i<COUNTOF(minfo->opcodes); i++) { + + const OpcodeInfo& oinfo = minfo->opcodes[i]; + OpcodeDesc& odesc = opcodes[minfo->mn][oindex]; + // last opcode ? + if (oinfo.opcode[0] == OpcodeByteKind_LAST) { + // mark the opcode 'last', exit + odesc.opcode_len = 0; + odesc.last = 1; + break; + } + odesc.last = 0; +#ifdef _EM64T_ + if (oinfo.platf == OpcodeInfo::ia32) { continue; } + if (oinfo.platf == OpcodeInfo::decoder32) { continue; } +#else + if (oinfo.platf == OpcodeInfo::em64t) { continue; } + if (oinfo.platf == OpcodeInfo::decoder64) { continue; } +#endif + if (oinfo.platf == OpcodeInfo::decoder64 || + oinfo.platf == OpcodeInfo::decoder32) { + odesc.platf = OpcodeInfo::decoder; + } + else { + odesc.platf = (char)oinfo.platf; + } + // + // fill out opcodes + // + unsigned j = 0; + odesc.opcode_len = 0; + for(; oinfo.opcode[j]; j++) { + unsigned opcod = oinfo.opcode[j]; + unsigned kind = opcod&OpcodeByteKind_KindMask; + if (kind == OpcodeByteKind_REX_W) { + odesc.opcode[odesc.opcode_len++] = (unsigned char)0x48; + continue; + } + else if(kind != 0 && kind != OpcodeByteKind_ZeroOpcodeByte) { + break; + } + unsigned lowByte = (opcod & OpcodeByteKind_OpcodeMask); + odesc.opcode[odesc.opcode_len++] = (unsigned char)lowByte; + } + assert(odesc.opcode_len<5); + odesc.aux0 = odesc.aux1 = 0; + if (oinfo.opcode[j] != 0) { + odesc.aux0 = oinfo.opcode[j]; + assert((odesc.aux0 & OpcodeByteKind_KindMask) != 0); + ++j; + if(oinfo.opcode[j] != 0) { + odesc.aux1 = oinfo.opcode[j]; + assert((odesc.aux1 & OpcodeByteKind_KindMask) != 0); + } + } + else if (oinfo.roles.count>=2) { + if (((oinfo.opnds[0].kind&OpndKind_Mem) && + (isRegKind(oinfo.opnds[1].kind))) || + ((oinfo.opnds[1].kind&OpndKind_Mem) && + (isRegKind(oinfo.opnds[0].kind)))) { + // Example: MOVQ xmm1, xmm/m64 has only opcodes + // same with SHRD + // Adding fake /r + odesc.aux0 = _r; + } + } + else if (oinfo.roles.count==1) { + if (oinfo.opnds[0].kind&OpndKind_Mem) { + // Example: SETcc r/m8, adding fake /0 + odesc.aux0 = _0; + } + } + // check imm + if (oinfo.roles.count > 0 && + (oinfo.opnds[0].kind == OpndKind_Imm || + oinfo.opnds[oinfo.roles.count-1].kind == OpndKind_Imm)) { + // Example: CALL cd, PUSH imm32 - they fit both opnds[0] and + // opnds[oinfo.roles.count-1]. + // The A3 opcode fits only opnds[0] - it's currently have + // MOV imm32, EAX. Looks ridiculous, but this is how the + // moffset is currently implemented. Will need to fix together + // with other usages of moff. + // adding fake /cd or fake /id + unsigned imm_opnd_index = + oinfo.opnds[0].kind == OpndKind_Imm ? 0 : oinfo.roles.count-1; + OpndSize sz = oinfo.opnds[imm_opnd_index].size; + unsigned imm_encode, coff_encode; + if (sz==OpndSize_8) {imm_encode = ib; coff_encode=cb; } + else if (sz==OpndSize_16) {imm_encode = iw; coff_encode=cw;} + else if (sz==OpndSize_32) {imm_encode = id; coff_encode=cd; } + else if (sz==OpndSize_64) {imm_encode = io; coff_encode=0xCC; } + else { assert(false); imm_encode=0xCC; coff_encode=0xCC; } + if (odesc.aux1 == 0) { + if (odesc.aux0==0) { + odesc.aux0 = imm_encode; + } + else { + if (odesc.aux0 != imm_encode && odesc.aux0 != coff_encode) { + odesc.aux1 = imm_encode; + } + } + } + else { + assert(odesc.aux1==imm_encode); + } + + } + + assert(sizeof(odesc.opnds) == sizeof(oinfo.opnds)); + memcpy(odesc.opnds, oinfo.opnds, + sizeof(EncoderBase::OpndDesc) + * EncoderBase::MAX_NUM_OPCODE_OPERANDS); + odesc.roles = oinfo.roles; + odesc.first_opnd = 0; + if (odesc.opnds[0].reg != RegName_Null) { + ++odesc.first_opnd; + if (odesc.opnds[1].reg != RegName_Null) { + ++odesc.first_opnd; + } + } + + if (odesc.platf == OpcodeInfo::decoder) { + // if the opcode is only for decoding info, then do not hash it. + ++oindex; + continue; + } + + // + // check whether the operand info is a mask (i.e. r_m*). + // in this case, split the info to have separate entries for 'r' + // and for 'm'. + // the good news is that there can be only one such operand. + // + int opnd2split = -1; + for (unsigned k=0; k<oinfo.roles.count; k++) { + if ((oinfo.opnds[k].kind & OpndKind_Mem) && + (OpndKind_Mem != oinfo.opnds[k].kind)) { + opnd2split = k; + break; + } + }; + + if (opnd2split == -1) { + // not a mask, hash it, store it, continue. + unsigned short hash = getHash(&oinfo); + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; + ++oindex; + continue; + }; + + OpcodeInfo storeItem = oinfo; + unsigned short hash; + + // remove the memory part of the mask, and store only 'r' part + storeItem.opnds[opnd2split].kind = (OpndKind)(storeItem.opnds[opnd2split].kind & ~OpndKind_Mem); + hash = getHash(&storeItem); + if (opcodesHashMap[minfo->mn][hash] == NOHASH) { + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; + } + // else { + // do not overwrite if there is something there, just check that operands match + // the reason is that for some instructions there are several possibilities: + // say 'DEC r' may be encode as either '48+r' or 'FF /1', and I believe + // the first one is better for 'dec r'. + // as we're currently processing an opcode with memory part in operand, + // leave already filled items intact, so if there is 'OP reg' there, this + // better choice will be left in the table instead of 'OP r_m' + // } + + // compute hash of memory-based operand, 'm' part in 'r_m' + storeItem.opnds[opnd2split].kind = OpndKind_Mem; + hash = getHash(&storeItem); + // should not happen: for the r_m opcodes, there is a possibility + // that hash value of 'r' part intersects with 'OP r' value, but it's + // impossible for 'm' part. + assert(opcodesHashMap[minfo->mn][hash] == NOHASH); + opcodesHashMap[minfo->mn][hash] = (unsigned char)oindex; + + ++oindex; + } +} + +ENCODER_NAMESPACE_END diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp new file mode 100644 index 0000000..b8abffe --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.cpp @@ -0,0 +1,836 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdio.h> +#include <assert.h> +#include <limits.h> +#include "enc_base.h" +#include "enc_wrapper.h" +#include "dec_base.h" +#include "utils/Log.h" + +//#define PRINT_ENCODER_STREAM +bool dump_x86_inst = false; + +/** + * @brief Provides mapping between PhysicalReg and RegName used by encoder + * @param physicalReg The physical register + * @return Returns encoder's register name + */ +static RegName mapFromPhysicalReg (int physicalReg) +{ + RegName reg = RegName_Null; + + //Get mapping between PhysicalReg and RegName + switch (physicalReg) + { + case PhysicalReg_EAX: + reg = RegName_EAX; + break; + case PhysicalReg_EBX: + reg = RegName_EBX; + break; + case PhysicalReg_ECX: + reg = RegName_ECX; + break; + case PhysicalReg_EDX: + reg = RegName_EDX; + break; + case PhysicalReg_EDI: + reg = RegName_EDI; + break; + case PhysicalReg_ESI: + reg = RegName_ESI; + break; + case PhysicalReg_ESP: + reg = RegName_ESP; + break; + case PhysicalReg_EBP: + reg = RegName_EBP; + break; + case PhysicalReg_XMM0: + reg = RegName_XMM0; + break; + case PhysicalReg_XMM1: + reg = RegName_XMM1; + break; + case PhysicalReg_XMM2: + reg = RegName_XMM2; + break; + case PhysicalReg_XMM3: + reg = RegName_XMM3; + break; + case PhysicalReg_XMM4: + reg = RegName_XMM4; + break; + case PhysicalReg_XMM5: + reg = RegName_XMM5; + break; + case PhysicalReg_XMM6: + reg = RegName_XMM6; + break; + case PhysicalReg_XMM7: + reg = RegName_XMM7; + break; + default: + //We have no mapping + reg = RegName_Null; + break; + } + + return reg; +} + +//getRegSize, getAliasReg: +//OpndSize, RegName, OpndExt: enum enc_defs.h +inline void add_r(EncoderBase::Operands & args, int physicalReg, OpndSize sz, OpndExt ext = OpndExt_None) { + if (sz == OpndSize_128) + { + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. + sz = OpndSize_64; + } + + RegName reg = mapFromPhysicalReg (physicalReg); + if (sz != getRegSize(reg)) { + reg = getAliasReg(reg, sz); + } + args.add(EncoderBase::Operand(reg, ext)); +} +inline void add_m(EncoderBase::Operands & args, int baseReg, int disp, OpndSize sz, OpndExt ext = OpndExt_None) { + if (sz == OpndSize_128) + { + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. + sz = OpndSize_64; + } + + args.add(EncoderBase::Operand(sz, + mapFromPhysicalReg (baseReg), + RegName_Null, 0, + disp, ext)); +} +inline void add_m_scale(EncoderBase::Operands & args, int baseReg, int indexReg, int scale, + OpndSize sz, OpndExt ext = OpndExt_None) { + if (sz == OpndSize_128) + { + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. + sz = OpndSize_64; + } + + args.add(EncoderBase::Operand(sz, + mapFromPhysicalReg (baseReg), + mapFromPhysicalReg (indexReg), scale, + 0, ext)); +} +inline void add_m_disp_scale(EncoderBase::Operands & args, int baseReg, int disp, int indexReg, int scale, + OpndSize sz, OpndExt ext = OpndExt_None) { + if (sz == OpndSize_128) + { + //For xmm registers, the encoder table contains them as 64-bit operands. Since semantics are determined + //by the encoding of the mnemonic, we change the size to 64-bit to make encoder happy. It will still + //generate the code for 128-bit size since for 64-bit all instructions have different encoding to use mmx. + sz = OpndSize_64; + } + + args.add(EncoderBase::Operand(sz, + mapFromPhysicalReg (baseReg), + mapFromPhysicalReg (indexReg), scale, + disp, ext)); +} + +inline void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) { + return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i)); +} +inline void add_imm(EncoderBase::Operands & args, OpndSize sz, int value, bool is_signed) { + //assert(n_size != imm.get_size()); + args.add(EncoderBase::Operand(sz, value, + is_signed ? OpndExt_Signed : OpndExt_Zero)); +} + +#define MAX_DECODED_STRING_LEN 1024 +char tmpBuffer[MAX_DECODED_STRING_LEN]; + +void printOperand(const EncoderBase::Operand & opnd) { + unsigned int sz; + if(!dump_x86_inst) return; + sz = strlen(tmpBuffer); + if(opnd.size() != OpndSize_32) { + const char * opndSizeString = getOpndSizeString(opnd.size()); + + if (opndSizeString == NULL) { + // If the string that represents operand size is null it means that + // the operand size is an invalid value. Although this could be a + // problem if instruction is corrupted, technically failing to + // disassemble is not fatal. Thus, let's warn but proceed with using + // an empty string. + ALOGW("JIT-WARNING: Cannot decode instruction operand size."); + opndSizeString = ""; + } + + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN - sz, "%s ", + opndSizeString); + } + if(opnd.is_mem()) { + if(opnd.scale() != 0) { + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, + "%d(%s,%s,%d)", opnd.disp(), + getRegNameString(opnd.base()), + getRegNameString(opnd.index()), opnd.scale()); + } else { + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%d(%s)", + opnd.disp(), getRegNameString(opnd.base())); + } + } + if(opnd.is_imm()) { + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "#%x", + (int)opnd.imm()); + } + if(opnd.is_reg()) { + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s", + getRegNameString(opnd.reg())); + } +} +//TODO: the order of operands +//to make the printout have the same order as assembly in .S +//I reverse the order here +void printDecoderInst(Inst & decInst) { + unsigned int sz; + if(!dump_x86_inst) return; + sz = strlen(tmpBuffer); + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, "%s ", + EncoderBase::toStr(decInst.mn)); + for(unsigned int k = 0; k < decInst.argc; k++) { + if(k > 0) { + sz = strlen(tmpBuffer); + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", "); + } + printOperand(decInst.operands[decInst.argc-1-k]); + } + ALOGE("%s", tmpBuffer); +} +void printOperands(EncoderBase::Operands& opnds) { + unsigned int sz; + if(!dump_x86_inst) return; + for(unsigned int k = 0; k < opnds.count(); k++) { + if(k > 0) { + sz = strlen(tmpBuffer); + sz += snprintf(&tmpBuffer[sz], MAX_DECODED_STRING_LEN-sz, ", "); + } + printOperand(opnds[opnds.count()-1-k]); + } +} +void printEncoderInst(Mnemonic m, EncoderBase::Operands& opnds) { + if(!dump_x86_inst) return; + snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- ENC %s ", + EncoderBase::toStr(m)); + printOperands(opnds); + ALOGE("%s", tmpBuffer); +} +int decodeThenPrint(char* stream_start) { + if(!dump_x86_inst) return 0; + snprintf(tmpBuffer, MAX_DECODED_STRING_LEN, "--- INST @ %p: ", + stream_start); + Inst decInst; + unsigned numBytes = DecoderBase::decode(stream_start, &decInst); + printDecoderInst(decInst); + return numBytes; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm(Mnemonic m, OpndSize size, int imm, char * stream) { + EncoderBase::Operands args; + //assert(imm.get_size() == size_32); + add_imm(args, size, imm, true/*is_signed*/); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream) { + Inst decInst; + unsigned numBytes = DecoderBase::decode(stream, &decInst); + return numBytes; +} + +extern "C" ENCODER_DECLARE_EXPORT uintptr_t encoder_get_cur_operand_offset(int opnd_id) +{ + return (uintptr_t)EncoderBase::getOpndLocation(opnd_id); +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm(int imm, char * stream) { + Inst decInst; + EncoderBase::Operands args; + + //Decode the instruction + DecoderBase::decode(stream, &decInst); + + add_imm(args, decInst.operands[0].size(), imm, true/*is_signed*/); + char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(decInst.mn, args); + decodeThenPrint(stream); +#endif + return stream_next; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, char * stream) { + EncoderBase::Operands args; + add_m(args, base_reg, disp, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg(Mnemonic m, OpndSize size, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + if(m == Mnemonic_DIV || m == Mnemonic_IDIV || m == Mnemonic_MUL || m == Mnemonic_IMUL) { + add_r(args, 0/*eax*/, size); + add_r(args, 3/*edx*/, size); + } + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +//! \brief Allows for different operand sizes +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg(Mnemonic m, OpndSize size, + int imm, int reg, bool isPhysical, LowOpndRegType type, char * stream) { + return encoder_imm_reg_diff_sizes(m, size, imm, size, reg, isPhysical, type, stream); +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize, + int reg, bool isPhysical, OpndSize destOpndSize, + int reg2, bool isPhysical2, LowOpndRegType type, char * stream) { + if((m == Mnemonic_MOV || m == Mnemonic_MOVQ || m == Mnemonic_MOVD) && reg == reg2) return stream; + EncoderBase::Operands args; + add_r(args, reg2, destOpndSize); //destination + if(m == Mnemonic_SAL || m == Mnemonic_SHR || m == Mnemonic_SHL || m == Mnemonic_SAR) + add_r(args, reg, OpndSize_8); + else + add_r(args, reg, srcOpndSize); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +//both operands have same size +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_reg(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int reg2, bool isPhysical2, LowOpndRegType type, char * stream) { + return encoder_reg_reg_diff_sizes(m, size, reg, isPhysical, size, reg2, isPhysical2, type, stream); +} +//! \brief Allows for different operand sizes +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, + int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, regOpndSize); + add_m(args, base_reg, disp, memOpndSize); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_reg(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + return encoder_mem_to_reg_diff_sizes(m, size, disp, base_reg, isBasePhysical, size, reg, isPhysical, type, stream); +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, size); + add_m_scale(args, base_reg, index_reg, scale, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem_scale(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, + LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_m_scale(args, base_reg, index_reg, scale, size); + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +//! \brief Allows for different operand sizes +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, regOpndSize); + add_m_disp_scale(args, base_reg, disp, index_reg, scale, memOpndSize); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + return encoder_mem_disp_scale_to_reg_diff_sizes(m, size, base_reg, isBasePhysical, + disp, index_reg, isIndexPhysical, scale, size, reg, isPhysical, + type, stream); +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, OpndSize_32); + add_m_disp_scale(args, base_reg, disp, index_reg, scale, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + LowOpndRegType type, char* stream) { + EncoderBase::Operands args; + add_m_disp_scale(args, base_reg, disp, index_reg, scale, size); + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_reg_mem(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_m(args, base_reg, disp, size); + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + if (m == Mnemonic_CMPXCHG ){ + //CMPXCHG require EAX as args + add_r(args,PhysicalReg_EAX,size); + //Add lock prefix for CMPXCHG, guarantee the atomic of CMPXCHG in multi-core platform + stream = (char *)EncoderBase::prefix(stream, InstPrefix_LOCK); + } + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes (Mnemonic m, OpndSize sizeImm, int imm, + OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream) +{ + //Create the operands + EncoderBase::Operands args; + //Add destination register + add_r (args, reg, sizeReg); + //For imul, we need to add implicit register explicitly + if (m == Mnemonic_IMUL) + { + add_r (args, reg, sizeReg); + } + //Finally add the immediate + add_imm (args, sizeImm, imm, true/*is_signed*/); + +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + + //Now do the encoding + stream = EncoderBase::encode (stream, m, args); + +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream) { + Inst decInst; + EncoderBase::Operands args; + + //Decode the instruction + DecoderBase::decode(stream, &decInst); + + args.add(decInst.operands[0]); + add_imm(args, decInst.operands[1].size(), imm, true/*is_signed*/); + char* stream_next = (char *)EncoderBase::encode(stream, decInst.mn, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(decInst.mn, args); + decodeThenPrint(stream); +#endif + return stream_next; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem(Mnemonic m, OpndSize size, + int imm, + int disp, int base_reg, bool isBasePhysical, char * stream) { + return encoder_imm_mem_diff_sizes(m, size, imm, size, disp, base_reg, isBasePhysical, stream); +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm, + OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream) +{ + //Add operands + EncoderBase::Operands args; + add_m (args, baseRegister, disp, memOpndSize); + add_imm (args, immOpndSize, imm, true); + +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + + //Do the encoding + stream = EncoderBase::encode (stream, m, args); + +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + + return stream; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_fp_mem(Mnemonic m, OpndSize size, int reg, + int disp, int base_reg, bool isBasePhysical, char * stream) { + EncoderBase::Operands args; + add_m(args, base_reg, disp, size); + // a fake FP register as operand + add_fp(args, reg, size == OpndSize_64/*is_double*/); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_mem_fp(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, char * stream) { + EncoderBase::Operands args; + // a fake FP register as operand + add_fp(args, reg, size == OpndSize_64/*is_double*/); + add_m(args, base_reg, disp, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_return(char * stream) { + EncoderBase::Operands args; +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, Mnemonic_RET, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(Mnemonic_RET, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char * stream) { + Mnemonic m = pop ? Mnemonic_FUCOMIP : Mnemonic_FUCOMI; + //a single operand or 2 operands? + //FST ST(i) has a single operand in encoder.inl? + EncoderBase::Operands args; + add_fp(args, reg, isDouble); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, m, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(m, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_mem_to_reg(OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, OpndSize_32); + add_m(args, base_reg, disp, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(Mnemonic_MOVZX, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_mem_to_reg(OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, char * stream) { + EncoderBase::Operands args; + add_r(args, reg, OpndSize_32); + add_m(args, base_reg, disp, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(Mnemonic_MOVSX, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size, + int reg, bool isPhysical, int reg2, + bool isPhysical2, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg2, OpndSize_32); //destination + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVZX, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(Mnemonic_MOVZX, args); + decodeThenPrint(stream_start); +#endif + return stream; +} +extern "C" ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size, + int reg, bool isPhysical,int reg2, + bool isPhysical2, LowOpndRegType type, char * stream) { + EncoderBase::Operands args; + add_r(args, reg2, OpndSize_32); //destination + add_r(args, reg, size); +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + stream = (char *)EncoderBase::encode(stream, Mnemonic_MOVSX, args); +#ifdef PRINT_ENCODER_STREAM + printEncoderInst(Mnemonic_MOVSX, args); + decodeThenPrint(stream_start); +#endif + return stream; +} + +extern "C" ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize, + int sourceReg, OpndSize sourceRegSize, int destReg, OpndSize destRegSize, char * stream) +{ + EncoderBase::Operands args; + + //Add the source and destination registers + add_r (args, destReg, destRegSize); + add_r (args, sourceReg, sourceRegSize); + + //Now add the immediate. We expect in three operand situation that immediate is last argument + add_imm (args, immediateSize, imm, true/*is_signed*/); + +#ifdef PRINT_ENCODER_STREAM + char* stream_start = stream; +#endif + + //Do the actual encoding + stream = EncoderBase::encode (stream, m, args); + +#ifdef PRINT_ENCODER_STREAM + printEncoderInst (m, args); + decodeThenPrint (stream_start); +#endif + + //Return the updated stream pointer + return stream; +} + +/** + * @brief Generates variable sized nop instructions. + * @param numBytes Number of bytes for the nop instruction. If this value is + * larger than 9 bytes, more than one nop instruction will be generated. + * @param stream Instruction stream where to place the nops + * @return Updated instruction stream pointer after generating the nops + */ +extern "C" ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream) { + return EncoderBase::nops(stream, numBytes); +} + +// Disassemble the operand "opnd" and put the readable format in "strbuf" +// up to a string length of "len". +unsigned int DisassembleOperandToBuf(const EncoderBase::Operand& opnd, char* strbuf, unsigned int len) +{ + unsigned int sz = 0; + if(opnd.size() != OpndSize_32) { + const char * opndSizeString = getOpndSizeString(opnd.size()); + + if (opndSizeString == NULL) { + // If the string that represents operand size is null it means that + // the operand size is an invalid value. Although this could be a + // problem if instruction is corrupted, technically failing to + // disassemble is not fatal. Thus, let's warn but proceed with using + // an empty string. + ALOGW("JIT-WARNING: Cannot decode instruction operand size."); + opndSizeString = ""; + } + + sz += snprintf(&strbuf[sz], len-sz, "%s ", opndSizeString); + } + if(opnd.is_mem()) { + if(opnd.scale() != 0) { + sz += snprintf(&strbuf[sz], len-sz, "%d(%s,%s,%d)", opnd.disp(), + getRegNameString(opnd.base()), + getRegNameString(opnd.index()), opnd.scale()); + } else { + sz += snprintf(&strbuf[sz], len-sz, "%d(%s)", + opnd.disp(), getRegNameString(opnd.base())); + } + } else if(opnd.is_imm()) { + sz += snprintf(&strbuf[sz], len-sz, "#%x", (int)opnd.imm()); + } else if(opnd.is_reg()) { + sz += snprintf(&strbuf[sz], len-sz, "%s", + getRegNameString(opnd.reg())); + } + return sz; +} + +// Disassemble the instruction "decInst" and put the readable format +// in "strbuf" up to a string length of "len". +void DisassembleInstToBuf(Inst& decInst, char* strbuf, unsigned int len) +{ + unsigned int sz = 0; + int k; + sz += snprintf(&strbuf[sz], len-sz, "%s ", EncoderBase::toStr(decInst.mn)); + if (decInst.argc > 0) { + sz += DisassembleOperandToBuf(decInst.operands[decInst.argc-1], + &strbuf[sz], len-sz); + for(k = decInst.argc-2; k >= 0; k--) { + sz += snprintf(&strbuf[sz], len-sz, ", "); + sz += DisassembleOperandToBuf(decInst.operands[k], &strbuf[sz], len-sz); + } + } +} + +// Disassmble the x86 instruction pointed to by code pointer "stream." +// Put the disassemble text in the "strbuf" up to string length "len". +// Return the code pointer after the disassemble x86 instruction. +extern "C" ENCODER_DECLARE_EXPORT +char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len) +{ + Inst decInst; + unsigned numBytes = DecoderBase::decode(stream, &decInst); + DisassembleInstToBuf(decInst, strbuf, len); + return (stream + numBytes); +} + +/** + * @brief Physical register char* counterparts + */ +static const char * PhysicalRegString[] = { "eax", "ebx", "ecx", "edx", "edi", + "esi", "esp", "ebp", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", + "xmm6", "xmm7", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7", + "null" + }; + +/** + * @brief Scratch register char* counterparts + */ +static const char * ScratchRegString[] = { "scratch1", "scratch2", "scratch3", + "scratch4", "scratch5", "scratch6", "scratch7", "scratch8", "scratch9", + "scratch10" }; + +extern "C" ENCODER_DECLARE_EXPORT +/** + * @brief Transform a physical register into its char* counterpart + * @param reg the PhysicalReg we want to have a char* equivalent + * @return the register reg in char* form + */ +const char * physicalRegToString(PhysicalReg reg) +{ + if (reg < PhysicalReg_Null) { + return PhysicalRegString[reg]; + } else if (reg >= PhysicalReg_SCRATCH_1 && reg <= PhysicalReg_SCRATCH_10) { + return ScratchRegString[reg - PhysicalReg_SCRATCH_1]; + } else if (reg == PhysicalReg_Null) { + return "null"; + } else { + return "corrupted-data"; + } +} diff --git a/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h new file mode 100644 index 0000000..3d2e68d --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/enc_wrapper.h @@ -0,0 +1,283 @@ +/* + * Copyright (C) 2012 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _VM_ENC_WRAPPER_H_ +#define _VM_ENC_WRAPPER_H_ + +#include "enc_defs_ext.h" + +extern bool dump_x86_inst; +typedef enum PhysicalReg { + // Currently initializing StartOfGPMarker to be 0 in order to match + // register index in Reg_No. However, ideally PhysicalReg_Null should + // be 0 and the rest moved over. + PhysicalReg_StartOfGPMarker = 0, + PhysicalReg_EAX = PhysicalReg_StartOfGPMarker, + PhysicalReg_EBX, PhysicalReg_ECX, PhysicalReg_EDX, + PhysicalReg_EDI, PhysicalReg_ESI, PhysicalReg_ESP, PhysicalReg_EBP, + PhysicalReg_EndOfGPMarker = PhysicalReg_EBP, + + PhysicalReg_StartOfXmmMarker, + PhysicalReg_XMM0 = PhysicalReg_StartOfXmmMarker, + PhysicalReg_XMM1, PhysicalReg_XMM2, PhysicalReg_XMM3, + PhysicalReg_XMM4, PhysicalReg_XMM5, PhysicalReg_XMM6, PhysicalReg_XMM7, + PhysicalReg_EndOfXmmMarker = PhysicalReg_XMM7, + + PhysicalReg_StartOfX87Marker, + PhysicalReg_ST0 = PhysicalReg_StartOfX87Marker, PhysicalReg_ST1, + PhysicalReg_ST2, PhysicalReg_ST3, PhysicalReg_ST4, PhysicalReg_ST5, + PhysicalReg_ST6, PhysicalReg_ST7, + PhysicalReg_EndOfX87Marker = PhysicalReg_ST7, + + PhysicalReg_Null, + //used as scratch logical register in NCG O1 + //should not overlap with regular logical register, start from 100 + PhysicalReg_SCRATCH_1 = 100, PhysicalReg_SCRATCH_2, PhysicalReg_SCRATCH_3, PhysicalReg_SCRATCH_4, + PhysicalReg_SCRATCH_5, PhysicalReg_SCRATCH_6, PhysicalReg_SCRATCH_7, PhysicalReg_SCRATCH_8, + PhysicalReg_SCRATCH_9, PhysicalReg_SCRATCH_10, + + //This should be the last entry + PhysicalReg_Last = PhysicalReg_SCRATCH_10 +} PhysicalReg; + +typedef enum Reg_No { +#ifdef _EM64T_ + rax_reg = 0,rbx_reg, rcx_reg, rdx_reg, + rdi_reg, rsi_reg, rsp_reg, rbp_reg, + r8_reg, r9_reg, r10_reg, r11_reg, + r12_reg, r13_reg, r14_reg, r15_reg, + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, + xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg, + xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg, + +#else // !defined(_EM64T_) + + eax_reg = 0,ebx_reg, ecx_reg, edx_reg, + edi_reg, esi_reg, esp_reg, ebp_reg, + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, + fs_reg, +#endif + /** @brief Total number of registers.*/ + n_reg +} Reg_No; +// +// instruction operand sizes: 8,16,32,64 bits +// +typedef enum Opnd_Size { + size_8 = 0, + size_16, + size_32, + size_64, + n_size, +#ifdef _EM64T_ + size_platf = size_64 +#else + size_platf = size_32 +#endif +} Opnd_Size; + +// +// opcodes for alu instructions +// +typedef enum ALU_Opcode { + add_opc = 0,or_opc, adc_opc, sbb_opc, + and_opc, sub_opc, xor_opc, cmp_opc, + mul_opc, imul_opc, div_opc, idiv_opc, + sll_opc, srl_opc, sra_opc, //shift right arithmetic + shl_opc, shr_opc, + sal_opc, sar_opc, + neg_opc, not_opc, andn_opc, + n_alu +} ALU_Opcode; + +typedef enum ConditionCode { + Condition_O = 0, + Condition_NO = 1, + Condition_B = 2, + Condition_NAE = Condition_B, + Condition_C = Condition_B, + Condition_NB = 3, + Condition_AE = Condition_NB, + Condition_NC = Condition_NB, + Condition_Z = 4, + Condition_E = Condition_Z, + Condition_NZ = 5, + Condition_NE = Condition_NZ, + Condition_BE = 6, + Condition_NA = Condition_BE, + Condition_NBE = 7, + Condition_A = Condition_NBE, + + Condition_S = 8, + Condition_NS = 9, + Condition_P = 10, + Condition_PE = Condition_P, + Condition_NP = 11, + Condition_PO = Condition_NP, + Condition_L = 12, + Condition_NGE = Condition_L, + Condition_NL = 13, + Condition_GE = Condition_NL, + Condition_LE = 14, + Condition_NG = Condition_LE, + Condition_NLE = 15, + Condition_G = Condition_NLE, + Condition_Count = 16 +} ConditionCode; + +// +// prefix code +// +typedef enum InstrPrefix { + no_prefix, + lock_prefix = 0xF0, + hint_branch_taken_prefix = 0x2E, + hint_branch_not_taken_prefix = 0x3E, + prefix_repne = 0xF2, + prefix_repnz = prefix_repne, + prefix_repe = 0xF3, + prefix_repz = prefix_repe, + prefix_rep = 0xF3, + prefix_cs = 0x2E, + prefix_ss = 0x36, + prefix_ds = 0x3E, + prefix_es = 0x26, + prefix_fs = 0x64, + prefix_gs = 0x65 +} InstrPrefix; + +enum LowOpndRegType +{ + LowOpndRegType_gp = 0, + LowOpndRegType_fs = 1, + LowOpndRegType_xmm = 2, + LowOpndRegType_fs_s = 3, + LowOpndRegType_ss = 4, + LowOpndRegType_invalid = 256, +}; + +enum LogicalRegType +{ + LogicalType_invalid = 0, + LowOpndRegType_scratch = 8, + LowOpndRegType_temp = 16, + LowOpndRegType_hard = 32, + LowOpndRegType_virtual = 64, +}; + +//if inline, separte enc_wrapper.cpp into two files, one of them is .inl +// enc_wrapper.cpp needs to handle both cases +#ifdef ENCODER_INLINE + #define ENCODER_DECLARE_EXPORT inline + #include "enc_wrapper.inl" +#else + #define ENCODER_DECLARE_EXPORT +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif +ENCODER_DECLARE_EXPORT char* encoder_imm(Mnemonic m, OpndSize size, + int imm, char* stream); +ENCODER_DECLARE_EXPORT unsigned encoder_get_inst_size(char * stream); +ENCODER_DECLARE_EXPORT char* encoder_update_imm(int imm, char * stream); +ENCODER_DECLARE_EXPORT char* encoder_mem(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_reg(Mnemonic m, OpndSize size, + int reg, bool isPhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_reg_reg(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int reg2, bool isPhysical2, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_reg_reg_diff_sizes(Mnemonic m, OpndSize srcOpndSize, + int reg, bool isPhysical, OpndSize destOpndSize, + int reg2, bool isPhysical2, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_mem_reg(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_mem_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, + int disp, int base_reg, bool isBasePhysical, OpndSize regOpndSize, + int reg, bool isPhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_mem_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_reg_mem_scale(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int base_reg, bool isBasePhysical, int index_reg, bool isIndexPhysical, int scale, + LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_diff_sizes(Mnemonic m, OpndSize memOpndSize, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_movzs_mem_disp_scale_reg(Mnemonic m, OpndSize size, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + int reg, bool isPhysical, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_mem_disp_scale_to_reg_2(Mnemonic m, OpndSize memOpndSize, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + OpndSize regOpndSize, int reg, bool isPhysical, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char* encoder_reg_mem_disp_scale(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int base_reg, bool isBasePhysical, int disp, int index_reg, bool isIndexPhysical, int scale, + LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_reg_mem(Mnemonic m, OpndSize size, + int reg, bool isPhysical, + int disp, int base_reg, bool isBasePhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_imm_reg(Mnemonic m, OpndSize size, + int imm, int reg, bool isPhysical, LowOpndRegType type, char* stream); +ENCODER_DECLARE_EXPORT char * encoder_imm_reg_diff_sizes(Mnemonic m, OpndSize sizeImm, + int imm, OpndSize sizeReg, int reg, bool isPhysical, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_update_imm_rm(int imm, char * stream); +ENCODER_DECLARE_EXPORT char* encoder_imm_mem(Mnemonic m, OpndSize size, + int imm, + int disp, int base_reg, bool isBasePhysical, char* stream); +ENCODER_DECLARE_EXPORT char * encoder_imm_mem_diff_sizes (Mnemonic m, OpndSize immOpndSize, int imm, + OpndSize memOpndSize, int disp, int baseRegister, bool isBasePhysical, char * stream); +ENCODER_DECLARE_EXPORT char* encoder_fp_mem(Mnemonic m, OpndSize size, int reg, + int disp, int base_reg, bool isBasePhysical, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_mem_fp(Mnemonic m, OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_return(char* stream); +ENCODER_DECLARE_EXPORT char* encoder_compare_fp_stack(bool pop, int reg, bool isDouble, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_movez_mem_to_reg(OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, char* stream); +ENCODER_DECLARE_EXPORT char* encoder_moves_mem_to_reg(OpndSize size, + int disp, int base_reg, bool isBasePhysical, + int reg, bool isPhysical, char* stream); +ENCODER_DECLARE_EXPORT char * encoder_movez_reg_to_reg(OpndSize size, + int reg, bool isPhysical, int reg2, + bool isPhysical2, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_moves_reg_to_reg(OpndSize size, + int reg, bool isPhysical, int reg2, + bool isPhysical2, LowOpndRegType type, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_imm_reg_reg (Mnemonic m, int imm, OpndSize immediateSize, + int sourceReg, OpndSize sourceRegSize, int destReg, + OpndSize destRegSize, char * stream); +ENCODER_DECLARE_EXPORT char * encoder_nops(unsigned numBytes, char * stream); +ENCODER_DECLARE_EXPORT int decodeThenPrint(char* stream_start); +ENCODER_DECLARE_EXPORT char* decoder_disassemble_instr(char* stream, char* strbuf, unsigned int len); + +//Provide a char* equivalent to a PhysicalReg type +ENCODER_DECLARE_EXPORT const char * physicalRegToString(PhysicalReg reg); +#ifdef __cplusplus +} +#endif +#endif // _VM_ENC_WRAPPER_H_ diff --git a/libpixelflinger/codeflinger/x86/libenc/encoder.h b/libpixelflinger/codeflinger/x86/libenc/encoder.h new file mode 100644 index 0000000..9ac0219 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/encoder.h @@ -0,0 +1,717 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ +/** + * @file + * @brief Simple interface for generating processor instructions. + * + * The interface works for both IA32 and EM64T. By default, only IA32 + * capabilities are presented. To enable EM64T feature, the _EM64T_ macro + * must be defined (and, of course, a proper library version to be used). + * + * The interface is based on the original ia32.h encoder interface, + * with some simplifications and add-ons - EM64T-specific, SSE and SSE2. + * + * The interface mostly intended for existing legacy code like LIL code + * generator. From the implementation point of view, it's just a wrapper + * around the EncoderBase functionality. + */ + +#ifndef _VM_ENCODER_H_ +#define _VM_ENCODER_H_ + +#include <limits.h> +#include "enc_base.h" +//#include "open/types.h" + +#ifdef _EM64T_ +// size of general-purpose value on the stack in bytes +#define GR_STACK_SIZE 8 +// size of floating-point value on the stack in bytes +#define FR_STACK_SIZE 8 + +#if defined(WIN32) || defined(_WIN64) + // maximum number of GP registers for inputs + const int MAX_GR = 4; + // maximum number of FP registers for inputs + const int MAX_FR = 4; + // WIN64 reserves 4 words for shadow space + const int SHADOW = 4 * GR_STACK_SIZE; +#else + // maximum number of GP registers for inputs + const int MAX_GR = 6; + // maximum number of FP registers for inputs + const int MAX_FR = 8; + // Linux x64 doesn't reserve shadow space + const int SHADOW = 0; +#endif + +#else +// size of general-purpose value on the stack in bytes +#define GR_STACK_SIZE 4 +// size of general-purpose value on the stack in bytes +#define FR_STACK_SIZE 8 + +// maximum number of GP registers for inputs +const int MAX_GR = 0; +// maximum number of FP registers for inputs +const int MAX_FR = 0; +#endif + +typedef enum Reg_No { +#ifdef _EM64T_ + rax_reg = 0,rbx_reg, rcx_reg, rdx_reg, + rdi_reg, rsi_reg, rsp_reg, rbp_reg, + r8_reg, r9_reg, r10_reg, r11_reg, + r12_reg, r13_reg, r14_reg, r15_reg, + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, + xmm8_reg, xmm9_reg, xmm10_reg, xmm11_reg, + xmm12_reg, xmm13_reg, xmm14_reg, xmm15_reg, + +#else // !defined(_EM64T_) + + eax_reg = 0,ebx_reg, ecx_reg, edx_reg, + edi_reg, esi_reg, esp_reg, ebp_reg, + xmm0_reg, xmm1_reg, xmm2_reg, xmm3_reg, + xmm4_reg, xmm5_reg, xmm6_reg, xmm7_reg, + fs_reg, +#endif + /** @brief Total number of registers.*/ + n_reg +} Reg_No; +// +// instruction operand sizes: 8,16,32,64 bits +// +typedef enum Opnd_Size { + size_8 = 0, + size_16, + size_32, + size_64, + n_size, +#ifdef _EM64T_ + size_platf = size_64 +#else + size_platf = size_32 +#endif +} Opnd_Size; + +// +// opcodes for alu instructions +// +typedef enum ALU_Opcode { + add_opc = 0,or_opc, adc_opc, sbb_opc, + and_opc, sub_opc, xor_opc, cmp_opc, + n_alu +} ALU_Opcode; + +// +// opcodes for shift instructions +// +typedef enum Shift_Opcode { + shld_opc, shrd_opc, shl_opc, shr_opc, + sar_opc, ror_opc, max_shift_opcode=6, n_shift = 6 +} Shift_Opcode; + +typedef enum ConditionCode { + Condition_O = 0, + Condition_NO = 1, + Condition_B = 2, + Condition_NAE = Condition_B, + Condition_C = Condition_B, + Condition_NB = 3, + Condition_AE = Condition_NB, + Condition_NC = Condition_NB, + Condition_Z = 4, + Condition_E = Condition_Z, + Condition_NZ = 5, + Condition_NE = Condition_NZ, + Condition_BE = 6, + Condition_NA = Condition_BE, + Condition_NBE = 7, + Condition_A = Condition_NBE, + + Condition_S = 8, + Condition_NS = 9, + Condition_P = 10, + Condition_PE = Condition_P, + Condition_NP = 11, + Condition_PO = Condition_NP, + Condition_L = 12, + Condition_NGE = Condition_L, + Condition_NL = 13, + Condition_GE = Condition_NL, + Condition_LE = 14, + Condition_NG = Condition_LE, + Condition_NLE = 15, + Condition_G = Condition_NLE, + Condition_Count = 16 +} ConditionCode; + +// +// prefix code +// +typedef enum InstrPrefix { + no_prefix, + lock_prefix = 0xF0, + hint_branch_taken_prefix = 0x2E, + hint_branch_not_taken_prefix = 0x3E, + prefix_repne = 0xF2, + prefix_repnz = prefix_repne, + prefix_repe = 0xF3, + prefix_repz = prefix_repe, + prefix_rep = 0xF3, + prefix_cs = 0x2E, + prefix_ss = 0x36, + prefix_ds = 0x3E, + prefix_es = 0x26, + prefix_fs = 0x64, + prefix_gs = 0x65 +} InstrPrefix; + + +// +// an instruction operand +// +class Opnd { + +protected: + enum Tag { SignedImm, UnsignedImm, Reg, Mem, FP, XMM }; + + const Tag tag; + + Opnd(Tag t): tag(t) {} + +public: + void * operator new(size_t, void * mem) { + return mem; + } + + void operator delete(void *) {} + + void operator delete(void *, void *) {} + +private: + // disallow copying + Opnd(const Opnd &): tag(Mem) { assert(false); } + Opnd& operator=(const Opnd &) { assert(false); return *this; } +}; +typedef int I_32; +class Imm_Opnd: public Opnd { + +protected: + union { +#ifdef _EM64T_ + int64 value; + unsigned char bytes[8]; +#else + I_32 value; + unsigned char bytes[4]; +#endif + }; + Opnd_Size size; + +public: + Imm_Opnd(I_32 val, bool isSigned = true): + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(size_32) { + if (isSigned) { + if (CHAR_MIN <= val && val <= CHAR_MAX) { + size = size_8; + } else if (SHRT_MIN <= val && val <= SHRT_MAX) { + size = size_16; + } + } else { + assert(val >= 0); + if (val <= UCHAR_MAX) { + size = size_8; + } else if (val <= USHRT_MAX) { + size = size_16; + } + } + } + Imm_Opnd(const Imm_Opnd& that): Opnd(that.tag), value(that.value), size(that.size) {}; + +#ifdef _EM64T_ + Imm_Opnd(Opnd_Size sz, int64 val, bool isSigned = true): + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) { +#ifndef NDEBUG + switch (size) { + case size_8: + assert(val == (int64)(I_8)val); + break; + case size_16: + assert(val == (int64)(int16)val); + break; + case size_32: + assert(val == (int64)(I_32)val); + break; + case size_64: + break; + case n_size: + assert(false); + break; + } +#endif // NDEBUG + } + + int64 get_value() const { return value; } + +#else + + Imm_Opnd(Opnd_Size sz, I_32 val, int isSigned = true): + Opnd(isSigned ? SignedImm : UnsignedImm), value(val), size(sz) { +#ifndef NDEBUG + switch (size) { + case size_8: + assert((I_32)val == (I_32)(I_8)val); + break; + case size_16: + assert((I_32)val == (I_32)(int16)val); + break; + case size_32: + break; + case size_64: + case n_size: + assert(false); + break; + } +#endif // NDEBUG + } + + I_32 get_value() const { return value; } + +#endif + Opnd_Size get_size() const { return size; } + bool is_signed() const { return tag == SignedImm; } +}; + +class RM_Opnd: public Opnd { + +public: + bool is_reg() const { return tag != SignedImm && tag != UnsignedImm && tag != Mem; } + +protected: + RM_Opnd(Tag t): Opnd(t) {} + +private: + // disallow copying + RM_Opnd(const RM_Opnd &): Opnd(Reg) { assert(false); } +}; + +class R_Opnd: public RM_Opnd { + +protected: + Reg_No _reg_no; + +public: + R_Opnd(Reg_No r): RM_Opnd(Reg), _reg_no(r) {} + Reg_No reg_no() const { return _reg_no; } + +private: + // disallow copying + R_Opnd(const R_Opnd &): RM_Opnd(Reg) { assert(false); } +}; + +// +// a memory operand with displacement +// Can also serve as a full memory operand with base,index, displacement and scale. +// Use n_reg to specify 'no register', say, for index. +class M_Opnd: public RM_Opnd { + +protected: + Imm_Opnd m_disp; + Imm_Opnd m_scale; + R_Opnd m_index; + R_Opnd m_base; + +public: + //M_Opnd(Opnd_Size sz): RM_Opnd(Mem, K_M, sz), m_disp(0), m_scale(0), m_index(n_reg), m_base(n_reg) {} + M_Opnd(I_32 disp): + RM_Opnd(Mem), m_disp(disp), m_scale(0), m_index(n_reg), m_base(n_reg) {} + M_Opnd(Reg_No rbase, I_32 rdisp): + RM_Opnd(Mem), m_disp(rdisp), m_scale(0), m_index(n_reg), m_base(rbase) {} + M_Opnd(I_32 disp, Reg_No rbase, Reg_No rindex, unsigned scale): + RM_Opnd(Mem), m_disp(disp), m_scale(scale), m_index(rindex), m_base(rbase) {} + M_Opnd(const M_Opnd & that) : RM_Opnd(Mem), + m_disp((int)that.m_disp.get_value()), m_scale((int)that.m_scale.get_value()), + m_index(that.m_index.reg_no()), m_base(that.m_base.reg_no()) + {} + // + inline const R_Opnd & base(void) const { return m_base; } + inline const R_Opnd & index(void) const { return m_index; } + inline const Imm_Opnd & scale(void) const { return m_scale; } + inline const Imm_Opnd & disp(void) const { return m_disp; } +}; + +// +// a memory operand with base register and displacement +// +class M_Base_Opnd: public M_Opnd { + +public: + M_Base_Opnd(Reg_No base, I_32 disp) : M_Opnd(disp, base, n_reg, 0) {} + +private: + // disallow copying - but it leads to ICC errors #734 in encoder.inl + // M_Base_Opnd(const M_Base_Opnd &): M_Opnd(0) { assert(false); } +}; + +// +// a memory operand with base register, scaled index register +// and displacement. +// +class M_Index_Opnd : public M_Opnd { + +public: + M_Index_Opnd(Reg_No base, Reg_No index, I_32 disp, unsigned scale): + M_Opnd(disp, base, index, scale) {} + +private: + // disallow copying - but it leads to ICC errors #734 in encoder.inl + // M_Index_Opnd(const M_Index_Opnd &): M_Opnd(0) { assert(false); } +}; + +class XMM_Opnd : public Opnd { + +protected: + unsigned m_idx; + +public: + XMM_Opnd(unsigned _idx): Opnd(XMM), m_idx(_idx) {}; + unsigned get_idx( void ) const { return m_idx; }; + +private: + // disallow copying + XMM_Opnd(const XMM_Opnd &): Opnd(XMM) { assert(false); } +}; + +// +// operand structures for ia32 registers +// +#ifdef _EM64T_ + +extern R_Opnd rax_opnd; +extern R_Opnd rcx_opnd; +extern R_Opnd rdx_opnd; +extern R_Opnd rbx_opnd; +extern R_Opnd rdi_opnd; +extern R_Opnd rsi_opnd; +extern R_Opnd rsp_opnd; +extern R_Opnd rbp_opnd; + +extern R_Opnd r8_opnd; +extern R_Opnd r9_opnd; +extern R_Opnd r10_opnd; +extern R_Opnd r11_opnd; +extern R_Opnd r12_opnd; +extern R_Opnd r13_opnd; +extern R_Opnd r14_opnd; +extern R_Opnd r15_opnd; + +extern XMM_Opnd xmm8_opnd; +extern XMM_Opnd xmm9_opnd; +extern XMM_Opnd xmm10_opnd; +extern XMM_Opnd xmm11_opnd; +extern XMM_Opnd xmm12_opnd; +extern XMM_Opnd xmm13_opnd; +extern XMM_Opnd xmm14_opnd; +extern XMM_Opnd xmm15_opnd; +#else + +extern R_Opnd eax_opnd; +extern R_Opnd ecx_opnd; +extern R_Opnd edx_opnd; +extern R_Opnd ebx_opnd; +extern R_Opnd esp_opnd; +extern R_Opnd ebp_opnd; +extern R_Opnd esi_opnd; +extern R_Opnd edi_opnd; + +#endif // _EM64T_ + +extern XMM_Opnd xmm0_opnd; +extern XMM_Opnd xmm1_opnd; +extern XMM_Opnd xmm2_opnd; +extern XMM_Opnd xmm3_opnd; +extern XMM_Opnd xmm4_opnd; +extern XMM_Opnd xmm5_opnd; +extern XMM_Opnd xmm6_opnd; +extern XMM_Opnd xmm7_opnd; + +#ifdef NO_ENCODER_INLINE + #define ENCODER_DECLARE_EXPORT +#else + #define ENCODER_DECLARE_EXPORT inline + #include "encoder.inl" +#endif + +// prefix +ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p); + +// stack push and pop instructions +ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm); +ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +// cmpxchg or xchg +ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); + +// inc(rement), dec(rement), not, neg(ate) instructions +ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * nop(char * stream); +ENCODER_DECLARE_EXPORT char * int3(char * stream); + +// alu instructions: add, or, adc, sbb, and, sub, xor, cmp +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +// test instruction +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); + +// shift instructions: shl, shr, sar, shld, shrd, ror +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode opc, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz = size_platf); + +// multiply instructions: mul, imul +ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, const Imm_Opnd& imm, Opnd_Size sz = size_platf); + +// divide instructions: div, idiv +ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +// data movement: mov +ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz = size_platf); + +ENCODER_DECLARE_EXPORT char * movsx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * movzx( char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm); +ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm); +ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm); +ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm); + +// sse mov +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); + +// sse add, sub, mul, div +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); + +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); + +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); + +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); + +// xor, compare +ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); + +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl); + +// sse conversions +ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl); +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64); +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32); +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1); + +// condition operations +ENCODER_DECLARE_EXPORT char * cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8); + +// load effective address: lea +ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz = size_platf); +ENCODER_DECLARE_EXPORT char * cdq(char * stream); +ENCODER_DECLARE_EXPORT char * wait(char * stream); + +// control-flow instructions +ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm); + +// jump with 8-bit relative +ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm); + +// jump with 32-bit relative +ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm); + +// register indirect jump +ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +// jump to target address +ENCODER_DECLARE_EXPORT char *jump(char * stream, char *target); + +// jump with displacement +//char * jump(char * stream, I_32 disp); + +// conditional branch with 8-bit branch offset +ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix); + +// conditional branch with 32-bit branch offset +ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cc, const Imm_Opnd & imm, InstrPrefix prefix = no_prefix); + +// conditional branch with target label address +//char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix = no_prefix); + +// conditional branch with displacement immediate +ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, I_32 disp, InstrPrefix prefix = no_prefix); + +// call with displacement +ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm); + +// indirect call through register or memory location +ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, Opnd_Size sz = size_platf); + +// call target address +ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target); + +// return instruction +ENCODER_DECLARE_EXPORT char * ret(char * stream); +ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop); +ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm); + +// string operations +ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set); +ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix); +ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix); + +// floating-point instructions + +// st(0) = st(0) fp_op m{32,64}real +//!char * fp_op_mem(char * stream, FP_Opcode opc,const M_Opnd& mem,int is_double); + +// st(0) = st(0) fp_op st(i) +//!char *fp_op(char * stream, FP_Opcode opc,unsigned i); + +// st(i) = st(i) fp_op st(0) ; optionally pop stack +//!char * fp_op(char * stream, FP_Opcode opc,unsigned i,unsigned pop_stk); + +// compare st(0),st(1) and pop stack twice +//!char * fcompp(char * stream); +ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem); +ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem); +ENCODER_DECLARE_EXPORT char * fnstsw(char * stream); +//!char * fchs(char * stream); +//!char * frem(char * stream); +//!char * fxch(char * stream,unsigned i); +//!char * fcomip(char * stream, unsigned i); + +// load from memory (as fp) into fp register stack +ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, bool is_double); +//!char *fld80(char * stream,const M_Opnd& mem); + +// load from memory (as int) into fp register stack +//!char * fild(char * stream,const M_Opnd& mem,int is_long); + +// push st(i) onto fp register stack +//!char * fld(char * stream,unsigned i); + +// push the constants 0.0 and 1.0 onto the fp register stack +//!char * fldz(char * stream); +//!char * fld1(char * stream); + +// store stack to memory (as int), always popping the stack +ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, bool is_long, bool pop_stk); +// store stack to to memory (as fp), optionally popping the stack +ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, bool is_double, bool pop_stk); +// store ST(0) to ST(i), optionally popping the stack. Takes 1 clock +ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk); + +//!char * pushad(char * stream); +//!char * pushfd(char * stream); +//!char * popad(char * stream); +//!char * popfd(char * stream); + +// stack frame allocation instructions: enter & leave +// +// enter frame_size +// +// is equivalent to: +// +// push ebp +// mov ebp,esp +// sub esp,frame_size +// +//!char *enter(char * stream,const Imm_Opnd& imm); + +// leave +// is equivalent to: +// +// mov esp,ebp +// pop ebp +//!char *leave(char * stream); + +// sahf loads SF, ZF, AF, PF, and CF flags from eax +//!char *sahf(char * stream); + +// Intrinsic FP math functions + +//!char *math_fsin(char * stream); +//!char *math_fcos(char * stream); +//!char *math_fabs(char * stream); +//!char *math_fpatan(char * stream); +ENCODER_DECLARE_EXPORT char * fprem(char * stream); +ENCODER_DECLARE_EXPORT char * fprem1(char * stream); +//!char *math_frndint(char * stream); +//!char *math_fptan(char * stream); + +// +// Add 1-7 bytes padding, with as few instructions as possible, +// with no effect on the processor state (e.g., registers, flags) +// +//!char *padding(char * stream, unsigned num); + +// prolog and epilog code generation +//- char *prolog(char * stream,unsigned frame_size,unsigned reg_save_mask); +//- char *epilog(char * stream,unsigned reg_save_mask); + +//!extern R_Opnd reg_operand_array[]; + +// fsave and frstor +//!char *fsave(char * stream); +//!char *frstor(char * stream); + +// lahf : Load Status Flags into AH Register +//!char *lahf(char * stream); + +// mfence : Memory Fence +//!char *mfence(char * stream); + +#endif // _VM_ENCODER_H_ diff --git a/libpixelflinger/codeflinger/x86/libenc/encoder.inl b/libpixelflinger/codeflinger/x86/libenc/encoder.inl new file mode 100644 index 0000000..ec72097 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/libenc/encoder.inl @@ -0,0 +1,863 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @author Alexander V. Astapchuk + */ +#include <stdio.h> +#include <assert.h> +#include <limits.h> + +extern const RegName map_of_regno_2_regname[]; +extern const OpndSize map_of_EncoderOpndSize_2_RealOpndSize[]; +extern const Mnemonic map_of_alu_opcode_2_mnemonic[]; +extern const Mnemonic map_of_shift_opcode_2_mnemonic[]; + +// S_ stands for 'Signed' +extern const Mnemonic S_map_of_condition_code_2_branch_mnemonic[]; +// U_ stands for 'Unsigned' +extern const Mnemonic U_map_of_condition_code_2_branch_mnemonic[]; + +inline static RegName map_reg(Reg_No r) { + assert(r >= 0 && r <= n_reg); + return map_of_regno_2_regname[r]; +} + +inline static OpndSize map_size(Opnd_Size o_size) { + assert(o_size >= 0 && o_size <= n_size); + return map_of_EncoderOpndSize_2_RealOpndSize[o_size]; +} + +inline static Mnemonic map_alu(ALU_Opcode alu) { + assert(alu >= 0 && alu < n_alu); + return map_of_alu_opcode_2_mnemonic[alu]; +} + +inline static Mnemonic map_shift(Shift_Opcode shc) { + assert(shc >= 0 && shc < n_shift); + return map_of_shift_opcode_2_mnemonic[shc]; +} + +inline bool fit8(int64 val) { + return (CHAR_MIN <= val) && (val <= CHAR_MAX); +} + +inline bool fit32(int64 val) { + return (INT_MIN <= val) && (val <= INT_MAX); +} + +inline static void add_r(EncoderBase::Operands & args, const R_Opnd & r, Opnd_Size sz, OpndExt ext = OpndExt_None) { + RegName reg = map_reg(r.reg_no()); + if (sz != n_size) { + OpndSize size = map_size(sz); + if (size != getRegSize(reg)) { + reg = getAliasReg(reg, size); + } + } + args.add(EncoderBase::Operand(reg, ext)); +} + +inline static void add_m(EncoderBase::Operands & args, const M_Opnd & m, Opnd_Size sz, OpndExt ext = OpndExt_None) { + assert(n_size != sz); + args.add(EncoderBase::Operand(map_size(sz), + map_reg(m.base().reg_no()), map_reg(m.index().reg_no()), + (unsigned)m.scale().get_value(), (int)m.disp().get_value(), ext)); +} + +inline static void add_rm(EncoderBase::Operands & args, const RM_Opnd & rm, Opnd_Size sz, OpndExt ext = OpndExt_None) { + rm.is_reg() ? add_r(args, (R_Opnd &)rm, sz, ext) : add_m(args, (M_Opnd &)rm, sz, ext); +} + +inline static void add_xmm(EncoderBase::Operands & args, const XMM_Opnd & xmm, bool dbl) { + // Gregory - + // XMM registers indexes in Reg_No enum are shifted by xmm0_reg, their indexes + // don't start with 0, so it is necessary to subtract xmm0_reg index from + // xmm.get_idx() value + assert(xmm.get_idx() >= xmm0_reg); + return args.add((RegName)( (dbl ? RegName_XMM0D : RegName_XMM0S) + xmm.get_idx() - + xmm0_reg)); +} + +inline static void add_fp(EncoderBase::Operands & args, unsigned i, bool dbl) { + return args.add((RegName)( (dbl ? RegName_FP0D : RegName_FP0S) + i)); +} + +inline static void add_imm(EncoderBase::Operands & args, const Imm_Opnd & imm) { + assert(n_size != imm.get_size()); + args.add(EncoderBase::Operand(map_size(imm.get_size()), imm.get_value(), + imm.is_signed() ? OpndExt_Signed : OpndExt_Zero)); +} + +ENCODER_DECLARE_EXPORT char * prefix(char * stream, InstrPrefix p) { + *stream = (char)p; + return stream + 1; +} + +// stack push and pop instructions +ENCODER_DECLARE_EXPORT char * push(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_PUSH, args); +} + +ENCODER_DECLARE_EXPORT char * push(char * stream, const Imm_Opnd & imm) { + EncoderBase::Operands args; +#ifdef _EM64T_ + add_imm(args, imm); +#else + // we need this workaround to be compatible with the former ia32 encoder implementation + add_imm(args, Imm_Opnd(size_32, imm.get_value())); +#endif + return EncoderBase::encode(stream, Mnemonic_PUSH, args); +} + +ENCODER_DECLARE_EXPORT char * pop(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_POP, args); +} + +// cmpxchg or xchg +ENCODER_DECLARE_EXPORT char * cmpxchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_r(args, r, sz); + RegName implicitReg = getAliasReg(RegName_EAX, map_size(sz)); + args.add(implicitReg); + return (char*)EncoderBase::encode(stream, Mnemonic_CMPXCHG, args); +} + +ENCODER_DECLARE_EXPORT char * xchg(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_r(args, r, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_XCHG, args); +} + +// inc(rement), dec(rement), not, neg(ate) instructions +ENCODER_DECLARE_EXPORT char * inc(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_INC, args); +} + +ENCODER_DECLARE_EXPORT char * dec(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_DEC, args); +} + +ENCODER_DECLARE_EXPORT char * _not(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_NOT, args); +} + +ENCODER_DECLARE_EXPORT char * neg(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_NEG, args); +} + +ENCODER_DECLARE_EXPORT char * nop(char * stream) { + EncoderBase::Operands args; + return (char*)EncoderBase::encode(stream, Mnemonic_NOP, args); +} + +ENCODER_DECLARE_EXPORT char * int3(char * stream) { + EncoderBase::Operands args; + return (char*)EncoderBase::encode(stream, Mnemonic_INT3, args); +} + +// alu instructions: add, or, adc, sbb, and, sub, xor, cmp +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, map_alu(opc), args); +}; + +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, m, sz); + add_rm(args, r, sz); + return (char*)EncoderBase::encode(stream, map_alu(opc), args); +} + +ENCODER_DECLARE_EXPORT char * alu(char * stream, ALU_Opcode opc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, r, sz); + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, map_alu(opc), args); +} + +// test instruction +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + assert(imm.get_size() <= sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args); +} + +ENCODER_DECLARE_EXPORT char * test(char * stream, const RM_Opnd & rm, const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_r(args, r, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_TEST, args); +} + +// shift instructions: shl, shr, sar, shld, shrd +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, map_shift(shc), args); +} + +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + args.add(RegName_CL); + return (char*)EncoderBase::encode(stream, map_shift(shc), args); +} + +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, + const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + assert(shc == shld_opc || shc == shrd_opc); + add_rm(args, rm, sz); + add_r(args, r, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, map_shift(shc), args); +} + +ENCODER_DECLARE_EXPORT char * shift(char * stream, Shift_Opcode shc, const RM_Opnd & rm, + const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + assert(shc == shld_opc || shc == shrd_opc); + add_rm(args, rm, sz); + add_r(args, r, sz); + args.add(RegName_CL); + return (char*)EncoderBase::encode(stream, map_shift(shc), args); +} + +// multiply instructions: mul, imul +ENCODER_DECLARE_EXPORT char * mul(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + args.add(RegName_EDX); + args.add(RegName_EAX); + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_MUL, args); +} + +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); +} + +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); +} + +ENCODER_DECLARE_EXPORT char * imul(char * stream, const R_Opnd & r, const RM_Opnd & rm, + const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_rm(args, rm, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_IMUL, args); +} + +// divide instructions: div, idiv +ENCODER_DECLARE_EXPORT char * idiv(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; +#ifdef _EM64T_ + add_r(args, rdx_opnd, sz); + add_r(args, rax_opnd, sz); +#else + add_r(args, edx_opnd, sz); + add_r(args, eax_opnd, sz); +#endif + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_IDIV, args); +} + +ENCODER_DECLARE_EXPORT char * div(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; +#ifdef _EM64T_ + add_r(args, rdx_opnd, sz); + add_r(args, rax_opnd, sz); +#else + add_r(args, edx_opnd, sz); + add_r(args, eax_opnd, sz); +#endif + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_DIV, args); +} + +// data movement: mov +ENCODER_DECLARE_EXPORT char * mov(char * stream, const M_Opnd & m, const R_Opnd & r, Opnd_Size sz) { + EncoderBase::Operands args; + add_m(args, m, sz); + add_r(args, r, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); +} + +ENCODER_DECLARE_EXPORT char * mov(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); +} + +ENCODER_DECLARE_EXPORT char * mov(char * stream, const RM_Opnd & rm, const Imm_Opnd & imm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_MOV, args); +} + +ENCODER_DECLARE_EXPORT char * movd(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) { + EncoderBase::Operands args; + add_rm(args, rm, size_32); + add_xmm(args, xmm, false); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args); +} + +ENCODER_DECLARE_EXPORT char * movd(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) { + EncoderBase::Operands args; + add_xmm(args, xmm, false); + add_rm(args, rm, size_32); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVD, args); +} + +ENCODER_DECLARE_EXPORT char * movq(char * stream, const RM_Opnd & rm, const XMM_Opnd & xmm) { + EncoderBase::Operands args; + add_rm(args, rm, size_64); + add_xmm(args, xmm, true); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args); +} + +ENCODER_DECLARE_EXPORT char * movq(char * stream, const XMM_Opnd & xmm, const RM_Opnd & rm) { + EncoderBase::Operands args; + add_xmm(args, xmm, true); + add_rm(args, rm, size_64); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVQ, args); +} + +ENCODER_DECLARE_EXPORT char * movsx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, n_size); + add_rm(args, rm, sz, OpndExt_Signed); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVSX, args); +} + +ENCODER_DECLARE_EXPORT char * movzx(char * stream, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, n_size); + // movzx r64, r/m32 is not available on em64t + // mov r32, r/m32 should zero out upper bytes + assert(sz <= size_16); + add_rm(args, rm, sz, OpndExt_Zero); + return (char*)EncoderBase::encode(stream, Mnemonic_MOVZX, args); +} + +// sse mov +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const M_Opnd & mem, const XMM_Opnd & xmm, bool dbl) { + EncoderBase::Operands args; + add_m(args, mem, dbl ? size_64 : size_32); + add_xmm(args, xmm, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_mov(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MOVSD : Mnemonic_MOVSS, args ); +} + +// sse add, sub, mul, div +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_add(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_ADDSD : Mnemonic_ADDSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_sub(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_SUBSD : Mnemonic_SUBSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_mul( char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_mul(char * stream, const XMM_Opnd& xmm0, const XMM_Opnd& xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_MULSD : Mnemonic_MULSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_div(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_DIVSD : Mnemonic_DIVSS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_xor(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { + EncoderBase::Operands args; + add_xmm(args, xmm0, true); + add_xmm(args, xmm1, true); + return (char*)EncoderBase::encode(stream, Mnemonic_PXOR, args); +} + +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, true); + add_xmm(args, xmm1, true); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_compare(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_COMISD : Mnemonic_COMISS, args); +} + +// sse conversions +ENCODER_DECLARE_EXPORT char * sse_cvt_si(char * stream, const XMM_Opnd & xmm, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm, dbl); + add_m(args, mem, size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTSI2SD : Mnemonic_CVTSI2SS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const M_Opnd & mem, bool dbl) { + EncoderBase::Operands args; + add_rm(args, reg, size_32); + add_m(args, mem, dbl ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args); +} + +ENCODER_DECLARE_EXPORT char * sse_cvtt2si(char * stream, const R_Opnd & reg, const XMM_Opnd & xmm, bool dbl) { + EncoderBase::Operands args; + add_rm(args, reg, size_32); + add_xmm(args, xmm, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI, args); +} + +ENCODER_DECLARE_EXPORT char * sse_cvt_fp2dq(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTTPD2DQ : Mnemonic_CVTTPS2DQ, args); +} + +ENCODER_DECLARE_EXPORT char * sse_cvt_dq2fp(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1, bool dbl) { + EncoderBase::Operands args; + add_xmm(args, xmm0, dbl); + add_xmm(args, xmm1, dbl); + return (char*)EncoderBase::encode(stream, dbl ? Mnemonic_CVTDQ2PD : Mnemonic_CVTDQ2PS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem64) { + EncoderBase::Operands args; + add_xmm(args, xmm0, false); + add_m(args, mem64, size_64); + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_d2s(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { + EncoderBase::Operands args; + add_xmm(args, xmm0, false); + add_xmm(args, xmm1, true); + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSD2SS, args); +} + +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const M_Opnd & mem32) { + EncoderBase::Operands args; + add_xmm(args, xmm0, true); + add_m(args, mem32, size_32); + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args); +} + +ENCODER_DECLARE_EXPORT char * sse_s2d(char * stream, const XMM_Opnd & xmm0, const XMM_Opnd & xmm1) { + EncoderBase::Operands args; + add_xmm(args, xmm0, true); + add_xmm(args, xmm1, false); + return (char*)EncoderBase::encode(stream, Mnemonic_CVTSS2SD, args); +} + +// condition operations +ENCODER_DECLARE_EXPORT char *cmov(char * stream, ConditionCode cc, const R_Opnd & r, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_CMOVcc + cc), args); +} + +ENCODER_DECLARE_EXPORT char * setcc(char * stream, ConditionCode cc, const RM_Opnd & rm8) { + EncoderBase::Operands args; + add_rm(args, rm8, size_8); + return (char*)EncoderBase::encode(stream, (Mnemonic)(Mnemonic_SETcc + cc), args); +} + +// load effective address: lea +ENCODER_DECLARE_EXPORT char * lea(char * stream, const R_Opnd & r, const M_Opnd & m, Opnd_Size sz) { + EncoderBase::Operands args; + add_r(args, r, sz); + add_m(args, m, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_LEA, args); +} + +ENCODER_DECLARE_EXPORT char * cdq(char * stream) { + EncoderBase::Operands args; + args.add(RegName_EDX); + args.add(RegName_EAX); + return (char*)EncoderBase::encode(stream, Mnemonic_CDQ, args); +} + +ENCODER_DECLARE_EXPORT char * wait(char * stream) { + return (char*)EncoderBase::encode(stream, Mnemonic_WAIT, EncoderBase::Operands()); +} + +// control-flow instructions + +// loop +ENCODER_DECLARE_EXPORT char * loop(char * stream, const Imm_Opnd & imm) { + EncoderBase::Operands args; + assert(imm.get_size() == size_8); + args.add(RegName_ECX); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_LOOP, args); +} + +// jump +ENCODER_DECLARE_EXPORT char * jump8(char * stream, const Imm_Opnd & imm) { + EncoderBase::Operands args; + assert(imm.get_size() == size_8); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); +} + +ENCODER_DECLARE_EXPORT char * jump32(char * stream, const Imm_Opnd & imm) { + EncoderBase::Operands args; + assert(imm.get_size() == size_32); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); +} + +ENCODER_DECLARE_EXPORT char * jump(char * stream, const RM_Opnd & rm, Opnd_Size sz) { + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_JMP, args); +} + +/** + * @note On EM64T: if target lies beyond 2G (does not fit into 32 bit + * offset) then generates indirect jump using RAX (whose content is + * destroyed). + */ +ENCODER_DECLARE_EXPORT char * jump(char * stream, char * target) { +#ifdef _EM64T_ + int64 offset = target - stream; + // sub 2 bytes for the short version + offset -= 2; + if (fit8(offset)) { + // use 8-bit signed relative form + return jump8(stream, Imm_Opnd(size_8, offset)); + } else if (fit32(offset)) { + // sub 5 (3 + 2)bytes for the long version + offset -= 3; + // use 32-bit signed relative form + return jump32(stream, Imm_Opnd(size_32, offset)); + } + // need to use absolute indirect jump + stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64); + return jump(stream, rax_opnd, size_64); +#else + I_32 offset = target - stream; + // sub 2 bytes for the short version + offset -= 2; + if (fit8(offset)) { + // use 8-bit signed relative form + return jump8(stream, Imm_Opnd(size_8, offset)); + } + // sub 5 (3 + 2) bytes for the long version + offset -= 3; + // use 32-bit signed relative form + return jump32(stream, Imm_Opnd(size_32, offset)); +#endif +} + +// branch +ENCODER_DECLARE_EXPORT char * branch8(char * stream, ConditionCode cond, + const Imm_Opnd & imm, + InstrPrefix pref) +{ + if (pref != no_prefix) { + assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix); + stream = prefix(stream, pref); + } + Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond); + EncoderBase::Operands args; + assert(imm.get_size() == size_8); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, m, args); +} + +ENCODER_DECLARE_EXPORT char * branch32(char * stream, ConditionCode cond, + const Imm_Opnd & imm, + InstrPrefix pref) +{ + if (pref != no_prefix) { + assert(pref == hint_branch_taken_prefix || pref == hint_branch_taken_prefix); + stream = prefix(stream, pref); + } + Mnemonic m = (Mnemonic)(Mnemonic_Jcc + cond); + EncoderBase::Operands args; + assert(imm.get_size() == size_32); + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, m, args); +} + +/* +ENCODER_DECLARE_EXPORT char * branch(char * stream, ConditionCode cc, const char * target, InstrPrefix prefix) { +// sub 2 bytes for the short version +int64 offset = stream-target-2; +if( fit8(offset) ) { +return branch8(stream, cc, Imm_Opnd(size_8, (char)offset), is_signed); +} +return branch32(stream, cc, Imm_Opnd(size_32, (int)offset), is_signed); +} +*/ + +// call +ENCODER_DECLARE_EXPORT char * call(char * stream, const Imm_Opnd & imm) +{ + EncoderBase::Operands args; + add_imm(args, imm); + return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args); +} + +ENCODER_DECLARE_EXPORT char * call(char * stream, const RM_Opnd & rm, + Opnd_Size sz) +{ + EncoderBase::Operands args; + add_rm(args, rm, sz); + return (char*)EncoderBase::encode(stream, Mnemonic_CALL, args); +} + +/** +* @note On EM64T: if target lies beyond 2G (does not fit into 32 bit +* offset) then generates indirect jump using RAX (whose content is +* destroyed). +*/ +ENCODER_DECLARE_EXPORT char * call(char * stream, const char * target) +{ +#ifdef _EM64T_ + int64 offset = target - stream; + if (fit32(offset)) { + offset -= 5; // sub 5 bytes for this instruction + Imm_Opnd imm(size_32, offset); + return call(stream, imm); + } + // need to use absolute indirect call + stream = mov(stream, rax_opnd, Imm_Opnd(size_64, (int64)target), size_64); + return call(stream, rax_opnd, size_64); +#else + I_32 offset = target - stream; + offset -= 5; // sub 5 bytes for this instruction + Imm_Opnd imm(size_32, offset); + return call(stream, imm); +#endif +} + +// return instruction +ENCODER_DECLARE_EXPORT char * ret(char * stream) +{ + EncoderBase::Operands args; + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); +} + +ENCODER_DECLARE_EXPORT char * ret(char * stream, const Imm_Opnd & imm) +{ + EncoderBase::Operands args; + // TheManual says imm can be 16-bit only + //assert(imm.get_size() <= size_16); + args.add(EncoderBase::Operand(map_size(size_16), imm.get_value())); + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); +} + +ENCODER_DECLARE_EXPORT char * ret(char * stream, unsigned short pop) +{ + // TheManual says it can only be imm16 + EncoderBase::Operands args(EncoderBase::Operand(OpndSize_16, pop, OpndExt_Zero)); + return (char*)EncoderBase::encode(stream, Mnemonic_RET, args); +} + +// floating-point instructions +ENCODER_DECLARE_EXPORT char * fld(char * stream, const M_Opnd & m, + bool is_double) { + EncoderBase::Operands args; + // a fake FP register as operand + add_fp(args, 0, is_double); + add_m(args, m, is_double ? size_64 : size_32); + return (char*)EncoderBase::encode(stream, Mnemonic_FLD, args); +} + +ENCODER_DECLARE_EXPORT char * fist(char * stream, const M_Opnd & mem, + bool is_long, bool pop_stk) +{ + EncoderBase::Operands args; + if (pop_stk) { + add_m(args, mem, is_long ? size_64 : size_32); + // a fake FP register as operand + add_fp(args, 0, is_long); + return (char*)EncoderBase::encode(stream, Mnemonic_FISTP, args); + } + // only 32-bit operands are supported + assert(is_long == false); + add_m(args, mem, size_32); + add_fp(args, 0, false); + return (char*)EncoderBase::encode(stream, Mnemonic_FIST, args); +} + +ENCODER_DECLARE_EXPORT char * fst(char * stream, const M_Opnd & m, + bool is_double, bool pop_stk) +{ + EncoderBase::Operands args; + add_m(args, m, is_double ? size_64 : size_32); + // a fake FP register as operand + add_fp(args, 0, is_double); + return (char*)EncoderBase::encode(stream, + pop_stk ? Mnemonic_FSTP : Mnemonic_FST, + args); +} + +ENCODER_DECLARE_EXPORT char * fst(char * stream, unsigned i, bool pop_stk) +{ + EncoderBase::Operands args; + add_fp(args, i, true); + return (char*)EncoderBase::encode(stream, + pop_stk ? Mnemonic_FSTP : Mnemonic_FST, + args); +} + +ENCODER_DECLARE_EXPORT char * fldcw(char * stream, const M_Opnd & mem) { + EncoderBase::Operands args; + add_m(args, mem, size_16); + return (char*)EncoderBase::encode(stream, Mnemonic_FLDCW, args); +} + +ENCODER_DECLARE_EXPORT char * fnstcw(char * stream, const M_Opnd & mem) { + EncoderBase::Operands args; + add_m(args, mem, size_16); + return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, args); +} + +ENCODER_DECLARE_EXPORT char * fnstsw(char * stream) +{ + return (char*)EncoderBase::encode(stream, Mnemonic_FNSTCW, + EncoderBase::Operands()); +} + +// string operations +ENCODER_DECLARE_EXPORT char * set_d(char * stream, bool set) { + EncoderBase::Operands args; + return (char*)EncoderBase::encode(stream, + set ? Mnemonic_STD : Mnemonic_CLD, + args); +} + +ENCODER_DECLARE_EXPORT char * scas(char * stream, unsigned char prefix) +{ + EncoderBase::Operands args; + if (prefix != no_prefix) { + assert(prefix == prefix_repnz || prefix == prefix_repz); + *stream = prefix; + ++stream; + } + return (char*)EncoderBase::encode(stream, Mnemonic_SCAS, args); +} + +ENCODER_DECLARE_EXPORT char * stos(char * stream, unsigned char prefix) +{ + if (prefix != no_prefix) { + assert(prefix == prefix_rep); + *stream = prefix; + ++stream; + } + + EncoderBase::Operands args; + return (char*)EncoderBase::encode(stream, Mnemonic_STOS, args); +} + +// Intrinsic FP math functions + +ENCODER_DECLARE_EXPORT char * fprem(char * stream) { + return (char*)EncoderBase::encode(stream, Mnemonic_FPREM, + EncoderBase::Operands()); +} + +ENCODER_DECLARE_EXPORT char * fprem1(char * stream) { + return (char*)EncoderBase::encode(stream, Mnemonic_FPREM1, + EncoderBase::Operands()); +} diff --git a/libpixelflinger/codeflinger/x86/load_store.cpp b/libpixelflinger/codeflinger/x86/load_store.cpp new file mode 100644 index 0000000..a427411 --- /dev/null +++ b/libpixelflinger/codeflinger/x86/load_store.cpp @@ -0,0 +1,458 @@ +/* libs/pixelflinger/codeflinger/x86/load_store.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include <assert.h> +#include <stdio.h> +#include <cutils/log.h> + +#include "codeflinger/x86/GGLX86Assembler.h" + +namespace android { + +// ---------------------------------------------------------------------------- + +void GGLX86Assembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) +{ + const int bits = addr.size; + const int inc = (flags & WRITE_BACK)?1:0; + switch (bits) { + case 32: + if (inc) { + MOV_REG_TO_MEM(s.reg, 0, addr.reg); + ADD_IMM_TO_REG(4, addr.reg); + } else { + MOV_REG_TO_MEM(s.reg, 0, addr.reg); + } + break; + case 24: + // 24 bits formats are a little special and used only for RGB + // 0x00BBGGRR is unpacked as R,G,B + MOV_REG_TO_MEM(s.reg, 0, addr.reg, OpndSize_8); + ROR(8, s.reg); + MOV_REG_TO_MEM(s.reg, 1, addr.reg, OpndSize_8); + ROR(8, s.reg); + MOV_REG_TO_MEM(s.reg, 2, addr.reg, OpndSize_8); + if (!(s.flags & CORRUPTIBLE)) { + ROR(16, s.reg); + } + if (inc) { + ADD_IMM_TO_REG(3, addr.reg); + } + break; + case 16: + if (inc) { + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16); + ADD_IMM_TO_REG(2, addr.reg); + } else { + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_16); + } + break; + case 8: + if (inc) { + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8); + ADD_IMM_TO_REG(1, addr.reg); + } else { + MOV_REG_TO_MEM(s.reg, 0, addr.reg,OpndSize_8); + } + break; + } +} + +void GGLX86Assembler::load(pointer_t& addr, const pixel_t& s, uint32_t flags) +{ + Scratch scratches(registerFile()); + int s0; + + const int bits = addr.size; + // WRITE_BACK indicates that the base register will also be updated after loading the data + const int inc = (flags & WRITE_BACK)?1:0; + switch (bits) { + case 32: + if (inc) { + MOV_MEM_TO_REG(0, addr.reg, s.reg); + ADD_IMM_TO_REG(4, addr.reg); + + } else MOV_MEM_TO_REG(0, addr.reg, s.reg); + break; + case 24: + // 24 bits formats are a little special and used only for RGB + // R,G,B is packed as 0x00BBGGRR + s0 = scratches.obtain(); + if (s.reg != addr.reg) { + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); //R + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G + SHL(8, s0); + OR_REG_TO_REG(s0, s.reg); + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B + SHL(16, s0); + OR_REG_TO_REG(s0, s.reg); + } else { + int s1 = scratches.obtain(); + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s1); //R + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 1, s0); //G + SHL(8, s0); + OR_REG_TO_REG(s0, s1); + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 2, s0); //B + SHL(16, s0); + OR_REG_TO_REG(s0, s1); + MOV_REG_TO_REG(s1, s.reg); + scratches.recycle(s1); + + } + scratches.recycle(s0); + if (inc) + ADD_IMM_TO_REG(3, addr.reg); + break; + case 16: + if (inc) { + MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg); + ADD_IMM_TO_REG(2, addr.reg); + } + else MOVZX_MEM_TO_REG(OpndSize_16, addr.reg, 0, s.reg); + break; + case 8: + if (inc) { + MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); + ADD_IMM_TO_REG(1, addr.reg); + } + else MOVZX_MEM_TO_REG(OpndSize_8, addr.reg, 0, s.reg); + break; + } + if (inc) MOV_REG_TO_MEM(addr.reg, addr.offset_ebp, PhysicalReg_EBP); +} + +void GGLX86Assembler::extract(integer_t& d, int s, int h, int l, int bits) +{ + const int maskLen = h-l; + + assert(maskLen<=8); + assert(h); + + + if (h != bits) { + const int mask = ((1<<maskLen)-1) << l; + MOV_REG_TO_REG(s, d.reg); + AND_IMM_TO_REG(mask, d.reg);// component = packed & mask; + s = d.reg; + } + + if (l) { + MOV_REG_TO_REG(s, d.reg); + SHR(l, d.reg);// component = packed >> l; + s = d.reg; + } + + if (s != d.reg) { + MOV_REG_TO_REG(s, d.reg); + } + + d.s = maskLen; +} + +void GGLX86Assembler::extract(integer_t& d, const pixel_t& s, int component) +{ + extract(d, s.reg, + s.format.c[component].h, + s.format.c[component].l, + s.size()); +} + +void GGLX86Assembler::extract(component_t& d, const pixel_t& s, int component) +{ + integer_t r(d.reg, 32, d.flags, d.offset_ebp); + extract(r, s.reg, + s.format.c[component].h, + s.format.c[component].l, + s.size()); + d = component_t(r); +} + + +void GGLX86Assembler::expand(integer_t& d, const component_t& s, int dbits) +{ + if (s.l || (s.flags & CLEAR_HI)) { + extract(d, s.reg, s.h, s.l, 32); + expand(d, d, dbits); + } else { + expand(d, integer_t(s.reg, s.size(), s.flags, s.offset_ebp), dbits); + } +} + +void GGLX86Assembler::expand(component_t& d, const component_t& s, int dbits) +{ + integer_t r(d.reg, 32, d.flags, d.offset_ebp); + expand(r, s, dbits); + d = component_t(r); +} + +void GGLX86Assembler::expand(integer_t& dst, const integer_t& src, int dbits) +{ + assert(src.size()); + + Scratch scratches(registerFile()); + int sbits = src.size(); + int s = src.reg; + int d = dst.reg; + + // be sure to set 'dst' after we read 'src' as they may be identical + dst.s = dbits; + dst.flags = 0; + + if (dbits<=sbits) { + if (s != d) { + MOV_REG_TO_REG(s, d); + } + return; + } + + if (sbits == 1) { + MOV_REG_TO_REG(s, d); + SHL(dbits, d); + SUB_REG_TO_REG(s, d); + // d = (s<<dbits) - s; + return; + } + + if (dbits % sbits) { + MOV_REG_TO_REG(s, d); + SHL(dbits-sbits, d); + // d = s << (dbits-sbits); + dbits -= sbits; + int temp = scratches.obtain(); + do { + MOV_REG_TO_REG(d, temp); + SHR(sbits, temp); + OR_REG_TO_REG(temp, d); + // d |= d >> sbits; + dbits -= sbits; + sbits *= 2; + } while(dbits>0); + return; + } + + dbits -= sbits; + do { + MOV_REG_TO_REG(s, d); + SHL(sbits, d); + OR_REG_TO_REG(s, d); + // d |= d<<sbits; + s = d; + dbits -= sbits; + if (sbits*2 < dbits) { + sbits *= 2; + } + } while(dbits>0); +} + +void GGLX86Assembler::downshift( + pixel_t& d, int component, component_t s, reg_t& dither) +{ + const needs_t& needs = mBuilderContext.needs; + Scratch scratches(registerFile()); + // s(temp) is loaded in build_blending + s.reg = scratches.obtain(); + MOV_MEM_TO_REG(s.offset_ebp, EBP, s.reg); + + int sh = s.h; + int sl = s.l; + int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; + int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; + int sbits = sh - sl; + + int dh = d.format.c[component].h; + int dl = d.format.c[component].l; + int dbits = dh - dl; + int dithering = 0; + + ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); + + if (sbits>dbits) { + // see if we need to dither + dithering = mDithering; + } + + int ireg = d.reg; + if (!(d.flags & FIRST)) { + if (s.flags & CORRUPTIBLE) { + ireg = s.reg; + } else { + ireg = scratches.obtain(); + } + } + d.flags &= ~FIRST; + + if (maskHiBits) { + // we need to mask the high bits (and possibly the lowbits too) + // and we might be able to use immediate mask. + if (!dithering) { + // we don't do this if we only have maskLoBits because we can + // do it more efficiently below (in the case where dl=0) + const int offset = sh - dbits; + if (dbits<=8 && offset >= 0) { + const uint32_t mask = ((1<<dbits)-1) << offset; + build_and_immediate(ireg, s.reg, mask, 32); + s.reg = ireg; + sl = offset; + sbits = dbits; + maskLoBits = maskHiBits = 0; + } + } else { + // in the dithering case though, we need to preserve the lower bits + const uint32_t mask = ((1<<sbits)-1) << sl; + build_and_immediate(ireg, s.reg, mask, 32); + s.reg = ireg; + maskLoBits = maskHiBits = 0; + } + } + + // XXX: we could special case (maskHiBits & !maskLoBits) + // like we do for maskLoBits below, but it happens very rarely + // that we have maskHiBits only and the conditions necessary to lead + // to better code (like doing d |= s << 24) + + if (maskHiBits) { + MOV_REG_TO_REG(s.reg, ireg); + SHL(32-sh, ireg); + sl += 32-sh; + sh = 32; + s.reg = ireg; + maskHiBits = 0; + } + + // Downsampling should be performed as follows: + // V * ((1<<dbits)-1) / ((1<<sbits)-1) + // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] + // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits + // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) + // + // By approximating (1>>dbits) and (1>>sbits) to 0: + // + // V>>(sbits-dbits) - V>>sbits + // + // A good approximation is V>>(sbits-dbits), + // but better one (needed for dithering) is: + // + // (V>>(sbits-dbits)<<sbits - V)>>sbits + // (V<<dbits - V)>>sbits + // (V - V>>dbits)>>(sbits-dbits) + + // Dithering is done here + if (dithering) { + comment("dithering"); + if (sl) { + MOV_REG_TO_REG(s.reg, ireg); + SHR(sl, ireg); + sh -= sl; + sl = 0; + s.reg = ireg; + } + // scaling (V-V>>dbits) + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(s.reg, temp_reg); + SHR(dbits, temp_reg); + MOV_REG_TO_REG(s.reg, ireg); + SUB_REG_TO_REG(temp_reg, ireg); + scratches.recycle(temp_reg); + const int shift = (GGL_DITHER_BITS - (sbits-dbits)); + dither.reg = scratches.obtain(); + MOV_MEM_TO_REG(dither.offset_ebp, EBP, dither.reg); + if (shift>0) { + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(dither.reg, temp_reg); + SHR(shift, temp_reg); + ADD_REG_TO_REG(temp_reg, ireg); + scratches.recycle(temp_reg); + } + else if (shift<0) { + temp_reg = scratches.obtain(); + MOV_REG_TO_REG(dither.reg, temp_reg); + SHL(-shift, temp_reg); + ADD_REG_TO_REG(temp_reg, ireg); + scratches.recycle(temp_reg); + } + else { + ADD_REG_TO_REG(dither.reg, ireg); + } + scratches.recycle(dither.reg); + s.reg = ireg; + } + + if ((maskLoBits|dithering) && (sh > dbits)) { + int shift = sh-dbits; + if (dl) { + MOV_REG_TO_REG(s.reg, ireg); + SHR(shift, ireg); + if (ireg == d.reg) { + MOV_REG_TO_REG(ireg, d.reg); + SHL(dl, d.reg); + } else { + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(ireg, temp_reg); + SHL(dl, temp_reg); + OR_REG_TO_REG(temp_reg, d.reg); + scratches.recycle(temp_reg); + } + } else { + if (ireg == d.reg) { + MOV_REG_TO_REG(s.reg, d.reg); + SHR(shift, d.reg); + } else { + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(s.reg, temp_reg); + SHR(shift, temp_reg); + OR_REG_TO_REG(temp_reg, d.reg); + scratches.recycle(temp_reg); + } + } + } else { + int shift = sh-dh; + if (shift>0) { + if (ireg == d.reg) { + MOV_REG_TO_REG(s.reg, d.reg); + SHR(shift, d.reg); + } else { + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(s.reg, temp_reg); + SHR(shift, temp_reg); + OR_REG_TO_REG(temp_reg, d.reg); + scratches.recycle(temp_reg); + } + } else if (shift<0) { + if (ireg == d.reg) { + MOV_REG_TO_REG(s.reg, d.reg); + SHL(-shift, d.reg); + } else { + int temp_reg = scratches.obtain(); + MOV_REG_TO_REG(s.reg, temp_reg); + SHL(-shift, temp_reg); + OR_REG_TO_REG(temp_reg, d.reg); + scratches.recycle(temp_reg); + } + } else { + if (ireg == d.reg) { + if (s.reg != d.reg) { + MOV_REG_TO_REG(s.reg, d.reg); + } + } else { + OR_REG_TO_REG(s.reg, d.reg); + } + } + } +} + +}; // namespace android diff --git a/libpixelflinger/codeflinger/x86/texturing.cpp b/libpixelflinger/codeflinger/x86/texturing.cpp new file mode 100644 index 0000000..c02f12b --- /dev/null +++ b/libpixelflinger/codeflinger/x86/texturing.cpp @@ -0,0 +1,1799 @@ +/* libs/pixelflinger/codeflinger/x86/texturing.cpp +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/types.h> + +#include <cutils/log.h> + +#include "codeflinger/x86/GGLX86Assembler.h" + + +namespace android { + +// --------------------------------------------------------------------------- + +// iterators are initialized like this: +// (intToFixedCenter(x) * dx)>>16 + x0 +// ((x<<16 + 0x8000) * dx)>>16 + x0 +// ((x<<16)*dx + (0x8000*dx))>>16 + x0 +// ( (x*dx) + dx>>1 ) + x0 +// (x*dx) + (dx>>1 + x0) + +void GGLX86Assembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) +{ + context_t const* c = mBuilderContext.c; + const needs_t& needs = mBuilderContext.needs; + int temp_reg; + + if (mSmooth) { + // NOTE: we could take this case in the mDithering + !mSmooth case, + // but this would use up to 4 more registers for the color components + // for only a little added quality. + // Currently, this causes the system to run out of registers in + // some case (see issue #719496) + + comment("compute initial iterated color (smooth and/or dither case)"); + + parts.iterated_packed = 0; + parts.packed = 0; + + // 0x1: color component + // 0x2: iterators + //parts.reload = 3; + const int optReload = mOptLevel >> 1; + if (optReload >= 3) parts.reload = 0; // reload nothing + else if (optReload == 2) parts.reload = 2; // reload iterators + else if (optReload == 1) parts.reload = 1; // reload colors + else if (optReload <= 0) parts.reload = 3; // reload both + + if (!mSmooth) { + // we're not smoothing (just dithering), we never have to + // reload the iterators + parts.reload &= ~2; + } + + Scratch scratches(registerFile()); + const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; + const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; + for (int i=0 ; i<4 ; i++) { + if (!mInfo[i].iterated) + continue; + // this component exists in the destination and is not replaced + // by a texture unit. + const int c = (parts.reload & 1) ? t0 : obtainReg(); + if (i==0) CONTEXT_LOAD(c, iterators.ydady); + if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); + if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); + if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); + parts.argb[i].reg = c; + + if (mInfo[i].smooth) { + parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); + const int dvdx = parts.argb_dx[i].reg; + temp_reg = scratches.obtain(); + CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); + MOV_REG_TO_REG(dvdx, temp_reg); + IMUL(x.reg, temp_reg); + ADD_REG_TO_REG(temp_reg, c); + scratches.recycle(temp_reg); + + // adjust the color iterator to make sure it won't overflow + if (!mAA) { + // this is not needed when we're using anti-aliasing + // because we will (have to) clamp the components + // anyway. + int end = scratches.obtain(); + MOV_MEM_TO_REG(parts.count.offset_ebp, PhysicalReg_EBP, end); + SHR(16, end); + IMUL(end, dvdx); + temp_reg = end; + // c - (dvdx*end + c) = -(dvdx*end) + MOV_REG_TO_REG(dvdx, temp_reg); + NEG(temp_reg); + ADD_REG_TO_REG(c, dvdx); + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp_reg, c); + /* + SUB_REG_TO_REG(dvdx, temp_reg); + switch(i) { + case 0: + JCC(Mnemonic_JNS, "1f_init_iterated_color"); + SUB_REG_TO_REG(dvdx, c); + label("1f_init_iterated_color"); + break; + case 1: + JCC(Mnemonic_JNS, "2f_init_iterated_color"); + SUB_REG_TO_REG(dvdx, c); + label("2f_init_iterated_color"); + break; + case 2: + JCC(Mnemonic_JNS, "3f_init_iterated_color"); + SUB_REG_TO_REG(dvdx, c); + label("3f_init_iterated_color"); + break; + case 3: + JCC(Mnemonic_JNS, "4f_init_iterated_color"); + SUB_REG_TO_REG(dvdx, c); + label("4f_init_iterated_color"); + break; + } + */ + + MOV_REG_TO_REG(c, temp_reg); + SAR(31, temp_reg); + NOT(temp_reg); + AND_REG_TO_REG(temp_reg, c); + scratches.recycle(end); + } + if(parts.reload & 2) + scratches.recycle(dvdx); + else + recycleReg(dvdx); + } + CONTEXT_STORE(c, generated_vars.argb[i].c); + if(parts.reload & 1) + scratches.recycle(parts.argb[i].reg); + else + recycleReg(parts.argb[i].reg); + + parts.argb[i].reg = -1; + //if (parts.reload & 1) { + // //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + //} + } + } else { + // We're not smoothed, so we can + // just use a packed version of the color and extract the + // components as needed (or not at all if we don't blend) + + // figure out if we need the iterated color + int load = 0; + for (int i=0 ; i<4 ; i++) { + component_info_t& info = mInfo[i]; + if ((info.inDest || info.needed) && !info.replaced) + load |= 1; + } + + parts.iterated_packed = 1; + parts.packed = (!mTextureMachine.mask && !mBlending + && !mFog && !mDithering); + parts.reload = 0; + if (load || parts.packed) { + if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { + comment("load initial iterated color (8888 packed)"); + parts.iterated.setTo(obtainReg(), + &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); + CONTEXT_LOAD(parts.iterated.reg, packed8888); + } else { + comment("load initial iterated color (dest format packed)"); + + parts.iterated.setTo(obtainReg(), &mCbFormat); + + // pre-mask the iterated color + const int bits = parts.iterated.size(); + const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; + uint32_t mask = 0; + if (mMasking) { + for (int i=0 ; i<4 ; i++) { + const int component_mask = 1<<i; + const int h = parts.iterated.format.c[i].h; + const int l = parts.iterated.format.c[i].l; + if (h && (!(mMasking & component_mask))) { + mask |= ((1<<(h-l))-1) << l; + } + } + } + + if (mMasking && ((mask & size)==0)) { + // none of the components are present in the mask + } else { + CONTEXT_LOAD(parts.iterated.reg, packed); + if (mCbFormat.size == 1) { + int imm = 0xFF; + AND_IMM_TO_REG(imm, parts.iterated.reg); + } else if (mCbFormat.size == 2) { + SHR(16, parts.iterated.reg); + } + } + + // pre-mask the iterated color + if (mMasking) { + //AND_IMM_TO_REG(mask, parts.iterated.reg); + build_and_immediate(parts.iterated.reg, parts.iterated.reg, + mask, bits); + } + } + mCurSp = mCurSp - 4; + parts.iterated.offset_ebp = mCurSp; + MOV_REG_TO_MEM(parts.iterated.reg, parts.iterated.offset_ebp, EBP); + //PUSH(parts.iterated.reg); + recycleReg(parts.iterated.reg); + parts.iterated.reg=-1; + } + } +} + +void GGLX86Assembler::build_iterated_color( + component_t& fragment, + fragment_parts_t& parts, + int component, + Scratch& regs) +{ + + if (!mInfo[component].iterated) + return; + + if (parts.iterated_packed) { + // iterated colors are packed, extract the one we need + parts.iterated.reg = regs.obtain(); + MOV_MEM_TO_REG(parts.iterated.offset_ebp, EBP, parts.iterated.reg); + extract(fragment, parts.iterated, component); + regs.recycle(parts.iterated.reg); + } else { + fragment.h = GGL_COLOR_BITS; + fragment.l = GGL_COLOR_BITS - 8; + fragment.flags |= CLEAR_LO; + // iterated colors are held in their own register, + // (smooth and/or dithering case) + Scratch scratches(registerFile()); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + if (parts.reload==3) { + // this implies mSmooth + int dx = scratches.obtain(); + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); + CONTEXT_LOAD(dx, generated_vars.argb[component].dx); + ADD_REG_TO_REG(fragment.reg, dx); + CONTEXT_STORE(dx, generated_vars.argb[component].c); + scratches.recycle(dx); + } else if (parts.reload & 1) { + //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg); + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); + } else { + // we don't reload, so simply rename the register and mark as + // non CORRUPTIBLE so that the texture env or blending code + // won't modify this (renamed) register + //regs.recycle(fragment.reg); + //MOV_MEM_TO_REG(parts.argb[component].offset_ebp, EBP, fragment.reg); + // it will also be used in build_smooth_shade + CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); + //fragment.reg = parts.argb[component].reg; + //fragment.flags &= ~CORRUPTIBLE; + } + scratches.recycle(mBuilderContext.Rctx); + if (mInfo[component].smooth && mAA) { + // when using smooth shading AND anti-aliasing, we need to clamp + // the iterators because there is always an extra pixel on the + // edges, which most of the time will cause an overflow + // (since technically its outside of the domain). + int temp = scratches.obtain(); + MOV_REG_TO_REG(fragment.reg, temp); + SAR(31, temp); + NOT(temp); + OR_REG_TO_REG(temp, fragment.reg); + component_sat(fragment, temp); + scratches.recycle(temp); + } + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::decodeLogicOpNeeds(const needs_t& needs) +{ + // gather some informations about the components we need to process... + const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; + switch(opcode) { + case GGL_COPY: + mLogicOp = 0; + break; + case GGL_CLEAR: + case GGL_SET: + mLogicOp = LOGIC_OP; + break; + case GGL_AND: + case GGL_AND_REVERSE: + case GGL_AND_INVERTED: + case GGL_XOR: + case GGL_OR: + case GGL_NOR: + case GGL_EQUIV: + case GGL_OR_REVERSE: + case GGL_OR_INVERTED: + case GGL_NAND: + mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; + break; + case GGL_NOOP: + case GGL_INVERT: + mLogicOp = LOGIC_OP|LOGIC_OP_DST; + break; + case GGL_COPY_INVERTED: + mLogicOp = LOGIC_OP|LOGIC_OP_SRC; + break; + }; +} + +void GGLX86Assembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) +{ + uint8_t replaced=0; + mTextureMachine.mask = 0; + mTextureMachine.activeUnits = 0; + for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { + texture_unit_t& tmu = mTextureMachine.tmu[i]; + if (replaced == 0xF) { + // all components are replaced, skip this TMU. + tmu.format_idx = 0; + tmu.mask = 0; + tmu.replaced = replaced; + continue; + } + tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); + tmu.format = c->formats[tmu.format_idx]; + tmu.bits = tmu.format.size*8; + tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); + tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); + tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); + tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); + tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) + && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now + + // 5551 linear filtering is not supported + if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) + tmu.linear = 0; + + tmu.mask = 0; + tmu.replaced = replaced; + + if (tmu.format_idx) { + mTextureMachine.activeUnits++; + if (tmu.format.c[0].h) tmu.mask |= 0x1; + if (tmu.format.c[1].h) tmu.mask |= 0x2; + if (tmu.format.c[2].h) tmu.mask |= 0x4; + if (tmu.format.c[3].h) tmu.mask |= 0x8; + if (tmu.env == GGL_REPLACE) { + replaced |= tmu.mask; + } else if (tmu.env == GGL_DECAL) { + if (!tmu.format.c[GGLFormat::ALPHA].h) { + // if we don't have alpha, decal does nothing + tmu.mask = 0; + } else { + // decal always ignores At + tmu.mask &= ~(1<<GGLFormat::ALPHA); + } + } + } + mTextureMachine.mask |= tmu.mask; + ////printf("%d: mask=%08lx, replaced=%08lx\n", + // i, int(tmu.mask), int(tmu.replaced)); + } + mTextureMachine.replaced = replaced; + mTextureMachine.directTexture = 0; + ////printf("replaced=%08lx\n", mTextureMachine.replaced); +} + + +void GGLX86Assembler::init_textures( + tex_coord_t* coords, + const reg_t& x, const reg_t& y) +{ + context_t const* c = mBuilderContext.c; + const needs_t& needs = mBuilderContext.needs; + reg_t temp_reg_t; + int Rx = x.reg; + int Ry = y.reg; + + if (mTextureMachine.mask) { + comment("compute texture coordinates"); + } + + // init texture coordinates for each tmu + const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); + const bool multiTexture = mTextureMachine.activeUnits > 1; + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { + const texture_unit_t& tmu = mTextureMachine.tmu[i]; + if (tmu.format_idx == 0) + continue; + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && + (tmu.twrap == GGL_NEEDS_WRAP_11)) + { + Scratch scratches(registerFile()); + // 1:1 texture + pointer_t& txPtr = coords[i].ptr; + txPtr.setTo(obtainReg(), tmu.bits); + CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); + SAR(16, txPtr.reg); + ADD_REG_TO_REG(txPtr.reg, Rx); + CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); + SAR(16, txPtr.reg); + ADD_REG_TO_REG(txPtr.reg, Ry); + // Rx and Ry are changed + // Rx = Rx + ti.iterators.ydsdy>>16 + // Ry = Ry + ti.iterators.ydtdy>>16 + // Rx = Ry * ti.stide + Rx + + // merge base & offset + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); + IMUL(Ry, txPtr.reg); + ADD_REG_TO_REG(txPtr.reg, Rx); + + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); + temp_reg_t.setTo(Rx); + base_offset(txPtr, txPtr, temp_reg_t); + //PUSH(txPtr.reg); + mCurSp = mCurSp - 4; + txPtr.offset_ebp = mCurSp; //ebx, esi, edi, parts.count.reg, parts.cbPtr.reg, parts.z.reg + MOV_REG_TO_MEM(txPtr.reg, txPtr.offset_ebp, EBP); + recycleReg(txPtr.reg); + txPtr.reg=-1; + } else { + Scratch scratches(registerFile()); + reg_t& s = coords[i].s; + reg_t& t = coords[i].t; + // s = (x * dsdx)>>16 + ydsdy + // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 + // t = (x * dtdx)>>16 + ydtdy + // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 + const int need_w = GGL_READ_NEEDS(W, needs.n); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + if (need_w) { + s.setTo(obtainReg()); + t.setTo(obtainReg()); + CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); + CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); + CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); + CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); + recycleReg(s.reg); + recycleReg(t.reg); + } else { + int ydsdy = scratches.obtain(); + int dsdx = scratches.obtain(); + CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); + IMUL(Rx, dsdx); + ADD_REG_TO_REG(dsdx, ydsdy); + CONTEXT_STORE(ydsdy, generated_vars.texture[i].spill[0]); + scratches.recycle(ydsdy); + scratches.recycle(dsdx); + + int ydtdy = scratches.obtain(); + int dtdx = scratches.obtain(); + CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); + IMUL(Rx, dtdx); + ADD_REG_TO_REG(dtdx, ydtdy); + CONTEXT_STORE(ydtdy, generated_vars.texture[i].spill[1]); + scratches.recycle(ydtdy); + scratches.recycle(dtdx); + + // s.reg = Rx * ti.dsdx + ydsdy + // t.reg = Rx * ti.dtdx + ydtdy + } + } + + // direct texture? + if (!multiTexture && !mBlending && !mDithering && !mFog && + cb_format_idx == tmu.format_idx && !tmu.linear && + mTextureMachine.replaced == tmu.mask) + { + mTextureMachine.directTexture = i + 1; + } + } +} + +void GGLX86Assembler::build_textures( fragment_parts_t& parts, + Scratch& regs) +{ + context_t const* c = mBuilderContext.c; + const needs_t& needs = mBuilderContext.needs; + reg_t temp_reg_t; + //int Rctx = mBuilderContext.Rctx; + + + const bool multiTexture = mTextureMachine.activeUnits > 1; + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { + const texture_unit_t& tmu = mTextureMachine.tmu[i]; + if (tmu.format_idx == 0) + continue; + + pointer_t& txPtr = parts.coords[i].ptr; + pixel_t& texel = parts.texel[i]; + + // repeat... + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && + (tmu.twrap == GGL_NEEDS_WRAP_11)) + { // 1:1 textures + comment("fetch texel"); + texel.setTo(regs.obtain(), &tmu.format); + txPtr.reg = regs.obtain(); + MOV_MEM_TO_REG(txPtr.offset_ebp, EBP, txPtr.reg); + mCurSp = mCurSp - 4; + texel.offset_ebp = mCurSp; + load(txPtr, texel, WRITE_BACK); + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); + regs.recycle(texel.reg); + regs.recycle(txPtr.reg); + } else { + Scratch scratches(registerFile()); + reg_t& s = parts.coords[i].s; + reg_t& t = parts.coords[i].t; + comment("reload s/t (multitexture or linear filtering)"); + s.reg = scratches.obtain(); + t.reg = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); + CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); + + comment("compute repeat/clamp"); + int width = scratches.obtain(); + int height = scratches.obtain(); + int U = 0; + int V = 0; + // U and V will be stored onto the stack due to the limited register + reg_t reg_U, reg_V; + + CONTEXT_LOAD(width, generated_vars.texture[i].width); + CONTEXT_LOAD(height, generated_vars.texture[i].height); + scratches.recycle(mBuilderContext.Rctx); + + int FRAC_BITS = 0; + if (tmu.linear) { + // linear interpolation + if (tmu.format.size == 1) { + // for 8-bits textures, we can afford + // 7 bits of fractional precision at no + // additional cost (we can't do 8 bits + // because filter8 uses signed 16 bits muls) + FRAC_BITS = 7; + } else if (tmu.format.size == 2) { + // filter16() is internally limited to 4 bits, so: + // FRAC_BITS=2 generates less instructions, + // FRAC_BITS=3,4,5 creates unpleasant artifacts, + // FRAC_BITS=6+ looks good + FRAC_BITS = 6; + } else if (tmu.format.size == 4) { + // filter32() is internally limited to 8 bits, so: + // FRAC_BITS=4 looks good + // FRAC_BITS=5+ looks better, but generates 3 extra ipp + FRAC_BITS = 6; + } else { + // for all other cases we use 4 bits. + FRAC_BITS = 4; + } + } + int u = scratches.obtain(); + // s.reg and t.reg are recycled in wrapping + wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS, scratches); + int v = scratches.obtain(); + wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS, scratches); + + + if (tmu.linear) { + + //mBuilderContext.Rctx = scratches.obtain(); + //MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + //CONTEXT_LOAD(width, generated_vars.texture[i].width); + //CONTEXT_LOAD(height, generated_vars.texture[i].height); + //scratches.recycle(mBuilderContext.Rctx); + + comment("compute linear filtering offsets"); + // pixel size scale + const int shift = 31 - gglClz(tmu.format.size); + U = scratches.obtain(); + V = scratches.obtain(); + + + // sample the texel center + SUB_IMM_TO_REG(1<<(FRAC_BITS-1), u); + SUB_IMM_TO_REG(1<<(FRAC_BITS-1), v); + + // get the fractionnal part of U,V + MOV_REG_TO_REG(u, U); + AND_IMM_TO_REG((1<<FRAC_BITS)-1, U); + MOV_REG_TO_REG(v, V); + AND_IMM_TO_REG((1<<FRAC_BITS)-1, V); + + // below we will pop U and V in the filter function + mCurSp = mCurSp - 4; + MOV_REG_TO_MEM(U, mCurSp, EBP); + reg_U.offset_ebp = mCurSp; + mCurSp = mCurSp - 4; + MOV_REG_TO_MEM(V, mCurSp, EBP); + reg_V.offset_ebp = mCurSp; + + scratches.recycle(U); + scratches.recycle(V); + + // compute width-1 and height-1 + SUB_IMM_TO_REG(1, width); + SUB_IMM_TO_REG(1, height); + + // the registers are used up + int temp1 = scratches.obtain(); + int temp2 = scratches.obtain(); + // get the integer part of U,V and clamp/wrap + // and compute offset to the next texel + if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { + // u has already been REPEATed + SAR(FRAC_BITS, u); + CMOV_REG_TO_REG(Mnemonic_CMOVS, width, u); + MOV_IMM_TO_REG(1<<shift, temp1); + MOV_REG_TO_REG(width, temp2); + // SHL may pollute the CF flag + SHL(shift, temp2); + mCurSp = mCurSp - 4; + int width_offset_ebp = mCurSp; + // width will be changed after the first comparison + MOV_REG_TO_MEM(width, width_offset_ebp, EBP); + CMP_REG_TO_REG(width, u); + CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, width); + if (shift) { + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, width); + } + MOV_REG_TO_REG(width, temp1); + NEG(temp1); + // width is actually changed + CMP_MEM_TO_REG(EBP, width_offset_ebp, u); + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, width); + } else { + // u has not been CLAMPed yet + // algorithm: + // if ((u>>4) >= width) + // u = width<<4 + // width = 0 + // else + // width = 1<<shift + // u = u>>4; // get integer part + // if (u<0) + // u = 0 + // width = 0 + // generated_vars.rt = width + + MOV_REG_TO_REG(width, temp2); + SHL(FRAC_BITS, temp2); + MOV_REG_TO_REG(u, temp1); + SAR(FRAC_BITS, temp1); + CMP_REG_TO_REG(temp1, width); + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, u); + // mov doesn't affect the flags + MOV_IMM_TO_REG(0, temp2); + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, width); + MOV_IMM_TO_REG(1 << shift, temp2); + CMOV_REG_TO_REG(Mnemonic_CMOVG, temp2, width); + + MOV_IMM_TO_REG(0, temp2); + SAR(FRAC_BITS, u); + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, u); + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, width); + } + scratches.recycle(temp1); + scratches.recycle(temp2); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + CONTEXT_STORE(width, generated_vars.rt); + + const int stride = width; + CONTEXT_LOAD(stride, generated_vars.texture[i].stride); + scratches.recycle(mBuilderContext.Rctx); + + temp1 = scratches.obtain(); + temp2 = scratches.obtain(); + + int height_offset_ebp; + if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { + // v has already been REPEATed + SAR(FRAC_BITS, v); + CMOV_REG_TO_REG(Mnemonic_CMOVS, height, v); + MOV_IMM_TO_REG(1<<shift, temp1); + MOV_REG_TO_REG(height, temp2); + SHL(shift, temp2); + mCurSp = mCurSp - 4; + height_offset_ebp = mCurSp; + // height will be changed after the first comparison + MOV_REG_TO_MEM(height, height_offset_ebp, EBP); + CMP_REG_TO_REG(height, v); + CMOV_REG_TO_REG(Mnemonic_CMOVL, temp1, height); + if (shift) { + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp2, height); + } + MOV_REG_TO_REG(height, temp1); + NEG(temp1); + // height is actually changed + CMP_MEM_TO_REG(EBP, height_offset_ebp, v); + CMOV_REG_TO_REG(Mnemonic_CMOVGE, temp1, height); + IMUL(stride, height); + } else { + // u has not been CLAMPed yet + MOV_REG_TO_REG(height, temp2); + SHL(FRAC_BITS, temp2); + MOV_REG_TO_REG(v, temp1); + SAR(FRAC_BITS, temp1); + + mCurSp = mCurSp - 4; + height_offset_ebp = mCurSp; + // height may be changed after the first comparison + MOV_REG_TO_MEM(height, height_offset_ebp, EBP); + + CMP_REG_TO_REG(temp1, height); + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, v); + MOV_IMM_TO_REG(0, temp2); + CMOV_REG_TO_REG(Mnemonic_CMOVLE, temp2, height); + + if (shift) { + // stride = width. It's not used + // shift may pollute the flags + SHL(shift, stride); + // height may be changed to 0 + CMP_REG_TO_MEM(temp1, height_offset_ebp, EBP); + CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height); + } else { + CMOV_REG_TO_REG(Mnemonic_CMOVG, stride, height); + } + MOV_IMM_TO_REG(0, temp2); + SAR(FRAC_BITS, v); + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, v); + CMOV_REG_TO_REG(Mnemonic_CMOVS, temp2, height); + } + scratches.recycle(temp1); + scratches.recycle(temp2); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + CONTEXT_STORE(height, generated_vars.lb); + scratches.recycle(mBuilderContext.Rctx); + } + + scratches.recycle(width); + scratches.recycle(height); + + // iterate texture coordinates... + comment("iterate s,t"); + int dsdx = scratches.obtain(); + s.reg = scratches.obtain(); + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); + CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); + ADD_REG_TO_REG(dsdx, s.reg); + CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); + scratches.recycle(s.reg); + scratches.recycle(dsdx); + int dtdx = scratches.obtain(); + t.reg = scratches.obtain(); + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); + CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); + ADD_REG_TO_REG(dtdx, t.reg); + CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); + scratches.recycle(dtdx); + scratches.recycle(t.reg); + + // merge base & offset... + comment("merge base & offset"); + texel.setTo(scratches.obtain(), &tmu.format); + //txPtr.setTo(texel.reg, tmu.bits); + txPtr.setTo(scratches.obtain(), tmu.bits); + int stride = scratches.obtain(); + CONTEXT_LOAD(stride, generated_vars.texture[i].stride); + CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].data); + scratches.recycle(mBuilderContext.Rctx); + MOVSX_REG_TO_REG(OpndSize_16, v, v); + MOVSX_REG_TO_REG(OpndSize_16, stride, stride); + IMUL(v, stride); + ADD_REG_TO_REG(stride, u);// u+v*stride + temp_reg_t.setTo(u); + base_offset(txPtr, txPtr, temp_reg_t); + + // recycle registers we don't need anymore + scratches.recycle(u); + scratches.recycle(v); + scratches.recycle(stride); + + mCurSp = mCurSp - 4; + texel.offset_ebp = mCurSp; + // load texel + if (!tmu.linear) { + comment("fetch texel in building texture"); + load(txPtr, texel, 0); + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); + scratches.recycle(texel.reg); + scratches.recycle(txPtr.reg); + } else { + comment("fetch texel, bilinear"); + // the registes are not enough. We spill texel and previous U and V + // texel.reg is recycled in the following functions since there are more than one code path + switch (tmu.format.size) { + case 1: + filter8(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); + break; + case 2: + filter16(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); + break; + case 3: + filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); + break; + case 4: + filter32(parts, texel, tmu, reg_U, reg_V, txPtr, FRAC_BITS, scratches); + break; + } + } + } + } +} + +void GGLX86Assembler::build_iterate_texture_coordinates( + const fragment_parts_t& parts) +{ + const bool multiTexture = mTextureMachine.activeUnits > 1; + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { + const texture_unit_t& tmu = mTextureMachine.tmu[i]; + if (tmu.format_idx == 0) + continue; + + if ((tmu.swrap == GGL_NEEDS_WRAP_11) && + (tmu.twrap == GGL_NEEDS_WRAP_11)) + { // 1:1 textures + const pointer_t& txPtr = parts.coords[i].ptr; + ADD_IMM_TO_MEM(txPtr.size>>3, txPtr.offset_ebp, EBP); + } else { + Scratch scratches(registerFile()); + int s = parts.coords[i].s.reg; + int t = parts.coords[i].t.reg; + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + s = scratches.obtain(); + int dsdx = scratches.obtain(); + CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); + CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); + ADD_REG_TO_REG(dsdx, s); + CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); + scratches.recycle(s); + scratches.recycle(dsdx); + int dtdx = scratches.obtain(); + t = scratches.obtain(); + CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); + CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); + ADD_REG_TO_REG(dtdx, t); + CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); + scratches.recycle(t); + scratches.recycle(dtdx); + } + } +} + +void GGLX86Assembler::filter8( + const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches) +{ + if (tmu.format.components != GGL_ALPHA && + tmu.format.components != GGL_LUMINANCE) + { + // this is a packed format, and we don't support + // linear filtering (it's probably RGB 332) + // Should not happen with OpenGL|ES + MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, texel.reg); + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); + scratches.recycle(texel.reg); + scratches.recycle(txPtr.reg); + return; + } + + // ------------------------ + + //int d = scratches.obtain(); + //int u = scratches.obtain(); + //int k = scratches.obtain(); + + scratches.recycle(texel.reg); + int rt = scratches.obtain(); + int lb = scratches.obtain(); + + // RB -> U * V + + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(rt, generated_vars.rt); + CONTEXT_LOAD(lb, generated_vars.lb); + scratches.recycle(mBuilderContext.Rctx); + int pixel= scratches.obtain(); + + int offset = pixel; + + MOV_REG_TO_REG(rt, offset); + ADD_REG_TO_REG(lb, offset); + + int temp_reg1 = scratches.obtain(); + int temp_reg2 = scratches.obtain(); + // it seems that the address mode with base and scale reg cannot be encoded correctly + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg1, OpndSize_8); + ADD_REG_TO_REG(txPtr.reg, offset); + MOVZX_MEM_TO_REG(OpndSize_8, offset, 0, temp_reg1); + // pixel is only 8-bits + MOV_REG_TO_REG(temp_reg1, pixel); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2); + IMUL(temp_reg2, temp_reg1); + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg2); + IMUL(temp_reg2, pixel); + NEG(temp_reg1); + ADD_IMM_TO_REG(1<<(FRAC_BITS*2), temp_reg1); + mCurSp = mCurSp - 4; + int d_offset_ebp = mCurSp; + MOV_REG_TO_MEM(pixel, d_offset_ebp, EBP); + mCurSp = mCurSp - 4; + int k_offset_ebp = mCurSp; + MOV_REG_TO_MEM(temp_reg1, k_offset_ebp, EBP); + + + // LB -> (1-U) * V + MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg2); + NEG(temp_reg2); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2); + MOV_REG_TO_MEM(temp_reg2, reg_U.offset_ebp, EBP); + + //MOV_MEM_SCALE_TO_REG(txPtr.reg, lb, 1, pixel, OpndSize_8); + ADD_REG_TO_REG(txPtr.reg, lb); + MOVZX_MEM_TO_REG(OpndSize_8, lb, 0, pixel); + + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg1); + IMUL(temp_reg1, temp_reg2); + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1); + IMUL(pixel, temp_reg1); + ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp); + SUB_REG_TO_MEM(temp_reg2, EBP, k_offset_ebp); + + + // LT -> (1-U)*(1-V) + MOV_MEM_TO_REG(reg_V.offset_ebp, EBP, temp_reg2); + NEG(temp_reg2); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2); + MOV_REG_TO_MEM(temp_reg2, reg_V.offset_ebp, EBP); + + MOVZX_MEM_TO_REG(OpndSize_8, txPtr.reg, 0, pixel); + + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg2); + IMUL(temp_reg1, temp_reg2); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg2, temp_reg1); + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); + IMUL(pixel, temp_reg1); + ADD_REG_TO_MEM(temp_reg1, EBP, d_offset_ebp); + + // RT -> U*(1-V) + //MOV_MEM_SCALE_TO_REG(txPtr.reg, rt, 1, pixel, OpndSize_8); + ADD_REG_TO_REG(txPtr.reg, rt); + MOVZX_MEM_TO_REG(OpndSize_8, rt, 0, pixel); + + int k = rt; + MOV_MEM_TO_REG(k_offset_ebp, EBP, k); + SUB_REG_TO_REG(temp_reg2, k); + MOVSX_REG_TO_REG(OpndSize_16, pixel, pixel); + MOVSX_REG_TO_REG(OpndSize_16, k, k); + IMUL(pixel, k); + ADD_MEM_TO_REG(EBP, d_offset_ebp, k); + MOV_REG_TO_MEM(k, texel.offset_ebp, EBP); + scratches.recycle(rt); + scratches.recycle(lb); + scratches.recycle(pixel); + scratches.recycle(txPtr.reg); + scratches.recycle(temp_reg1); + scratches.recycle(temp_reg2); + for (int i=0 ; i<4 ; i++) { + if (!texel.format.c[i].h) continue; + texel.format.c[i].h = FRAC_BITS*2+8; + texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough + } + texel.format.size = 4; + texel.format.bitsPerPixel = 32; + texel.flags |= CLEAR_LO; +} + +void GGLX86Assembler::filter16( + const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches) +{ + // compute the mask + // XXX: it would be nice if the mask below could be computed + // automatically. + uint32_t mask = 0; + int shift = 0; + int prec = 0; + switch (tmu.format_idx) { + case GGL_PIXEL_FORMAT_RGB_565: + // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb + // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb + mask = 0x07E0F81F; + shift = 16; + prec = 5; + break; + case GGL_PIXEL_FORMAT_RGBA_4444: + // 0000,1111,0000,1111 | 0000,1111,0000,1111 + mask = 0x0F0F0F0F; + shift = 12; + prec = 4; + break; + case GGL_PIXEL_FORMAT_LA_88: + // 0000,0000,1111,1111 | 0000,0000,1111,1111 + // AALL -> 00AA | 00LL + mask = 0x00FF00FF; + shift = 8; + prec = 8; + break; + default: + // unsupported format, do something sensical... + ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); + MOVZX_MEM_TO_REG(OpndSize_16, txPtr.reg, 0, texel.reg); + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); + scratches.recycle(texel.reg); + scratches.recycle(txPtr.reg); + return; + } + + const int adjust = FRAC_BITS*2 - prec; + const int round = 0; + + // update the texel format + texel.format.size = 4; + texel.format.bitsPerPixel = 32; + texel.flags |= CLEAR_HI|CLEAR_LO; + for (int i=0 ; i<4 ; i++) { + if (!texel.format.c[i].h) continue; + const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; + texel.format.c[i].h = tmu.format.c[i].h + offset + prec; + texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); + } + + // ------------------------ + + scratches.recycle(texel.reg); + + int pixel= scratches.obtain(); + int u = scratches.obtain(); + int temp_reg1 = scratches.obtain(); + + // RB -> U * V + //printf("RB -> U * V \n"); + int offset = pixel; + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(offset, generated_vars.rt); + CONTEXT_LOAD(u, generated_vars.lb); + ADD_REG_TO_REG(u, offset); + + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg1, OpndSize_16); + ADD_REG_TO_REG(txPtr.reg, offset); + MOVZX_MEM_TO_REG(OpndSize_16, offset, 0, temp_reg1); + + MOV_REG_TO_REG(temp_reg1, pixel); + + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, u); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg1); + IMUL(temp_reg1, u); + MOV_REG_TO_REG(pixel, temp_reg1); + SHL(shift, temp_reg1); + OR_REG_TO_REG(temp_reg1, pixel); + build_and_immediate(pixel, pixel, mask, 32); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), u); + SHR(adjust, u); + } + int d = scratches.obtain(); + MOV_REG_TO_REG(u, d); + IMUL(pixel, d); + NEG(u); + ADD_IMM_TO_REG(1<<prec, u); + + + // LB -> (1-U) * V + //printf("LB -> (1- U) * V \n"); + MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg1); + NEG(temp_reg1); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg1); + MOV_REG_TO_MEM(temp_reg1, reg_U.offset_ebp, EBP); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg1); + + CONTEXT_LOAD(offset, generated_vars.lb); + scratches.recycle(mBuilderContext.Rctx); + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, pixel, OpndSize_16); + ADD_REG_TO_REG(txPtr.reg, offset); + MOVZX_MEM_TO_REG(OpndSize_16, offset, 0, pixel); + + int temp_reg2 = scratches.obtain(); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2); + IMUL(temp_reg1, temp_reg2); + MOV_REG_TO_REG(pixel, temp_reg1); + SHL(shift, temp_reg1); + OR_REG_TO_REG(temp_reg1, pixel); + build_and_immediate(pixel, pixel, mask, 32); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), temp_reg2); + SHR(adjust, temp_reg2); + } + IMUL(temp_reg2, pixel); + ADD_REG_TO_REG(pixel, d); + SUB_REG_TO_REG(temp_reg2, u); + + + // LT -> (1-U)*(1-V) + //printf("LT -> (1- U)*(1-V) \n"); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2); + NEG(temp_reg2); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg2); + MOV_REG_TO_MEM(temp_reg2, reg_V.offset_ebp, EBP); + MOVZX_MEM_TO_REG(OpndSize_16, txPtr.reg, 0, pixel); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); + IMUL(temp_reg1, temp_reg2); + MOV_REG_TO_REG(pixel, temp_reg1); + SHL(shift, temp_reg1); + OR_REG_TO_REG(temp_reg1, pixel); + build_and_immediate(pixel, pixel, mask, 32); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), temp_reg2); + SHR(adjust, temp_reg2); + } + IMUL(temp_reg2, pixel); + ADD_REG_TO_REG(pixel, d); + + + // RT -> U*(1-V) + //printf("RT -> U*(1-V) \n"); + SUB_REG_TO_REG(temp_reg2, u); + mBuilderContext.Rctx = temp_reg2; + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(temp_reg1, generated_vars.rt); + //MOV_MEM_SCALE_TO_REG(txPtr.reg, temp_reg1, 1, pixel, OpndSize_16); + ADD_REG_TO_REG(txPtr.reg, temp_reg1); + MOVZX_MEM_TO_REG(OpndSize_16, temp_reg1, 0, pixel); + + MOV_REG_TO_REG(pixel, temp_reg1); + SHL(shift, temp_reg1); + OR_REG_TO_REG(temp_reg1, pixel); + build_and_immediate(pixel, pixel, mask, 32); + IMUL(u, pixel); + ADD_REG_TO_REG(pixel, d); + MOV_REG_TO_MEM(d, texel.offset_ebp, EBP); + scratches.recycle(d); + scratches.recycle(pixel); + scratches.recycle(u); + scratches.recycle(txPtr.reg); + scratches.recycle(temp_reg1); + scratches.recycle(temp_reg2); +} + +void GGLX86Assembler::filter24( + const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + int U, int V, pointer_t& txPtr, + int FRAC_BITS) +{ + // not supported yet (currently disabled) + load(txPtr, texel, 0); +} + +void GGLX86Assembler::filter32( + const fragment_parts_t& parts, + pixel_t& texel, const texture_unit_t& tmu, + reg_t reg_U, reg_t reg_V, pointer_t& txPtr, + int FRAC_BITS, Scratch& scratches) +{ + const int adjust = FRAC_BITS*2 - 8; + const int round = 0; + + // ------------------------ + scratches.recycle(texel.reg); + int mask = scratches.obtain(); + int pixel= scratches.obtain(); + int u = scratches.obtain(); + + //int dh = scratches.obtain(); + //int k = scratches.obtain(); + //int temp = scratches.obtain(); + //int dl = scratches.obtain(); + + MOV_IMM_TO_REG(0xFF, mask); + OR_IMM_TO_REG(0xFF0000, mask); + + // RB -> U * V + int offset = pixel; + mBuilderContext.Rctx = scratches.obtain(); + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(offset, generated_vars.rt); + CONTEXT_LOAD(u, generated_vars.lb); + ADD_REG_TO_REG(u, offset); + scratches.recycle(mBuilderContext.Rctx); + + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, u); + ADD_REG_TO_REG(txPtr.reg, offset); + MOV_MEM_TO_REG(0, offset, u); + + MOV_REG_TO_REG(u, pixel); + + int temp_reg1 = scratches.obtain(); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_U.offset_ebp, temp_reg1); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, u); + IMUL(temp_reg1, u); + MOV_REG_TO_REG(mask, temp_reg1); + AND_REG_TO_REG(pixel, temp_reg1); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), u); + SHR(adjust, u); + } + int temp_reg2 = scratches.obtain(); + MOV_REG_TO_REG(temp_reg1, temp_reg2); + IMUL(u, temp_reg2); + SHR(8, pixel); + AND_REG_TO_REG(mask, pixel); + IMUL(u, pixel); + NEG(u); + ADD_IMM_TO_REG(0x100, u); + mCurSp = mCurSp - 4; + int dh_offset_ebp = mCurSp; + MOV_REG_TO_MEM(temp_reg2, dh_offset_ebp, EBP); + mCurSp = mCurSp - 4; + int dl_offset_ebp = mCurSp; + MOV_REG_TO_MEM(pixel, dl_offset_ebp, EBP); + + // LB -> (1-U) * V + mBuilderContext.Rctx = temp_reg2; + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(offset, generated_vars.lb); + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg2); + ADD_REG_TO_REG(txPtr.reg, offset); + MOV_MEM_TO_REG(0, offset, temp_reg2); + + MOV_REG_TO_REG(temp_reg2, pixel); + MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg1); + NEG(temp_reg1); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg1); + MOV_REG_TO_MEM(temp_reg1, reg_U.offset_ebp, EBP); + MOVSX_REG_TO_REG(OpndSize_16, temp_reg1, temp_reg1); + MOVSX_MEM_TO_REG(OpndSize_16, EBP, reg_V.offset_ebp, temp_reg2); + IMUL(temp_reg2, temp_reg1); + MOV_REG_TO_REG(mask, temp_reg2); + AND_REG_TO_REG(pixel, temp_reg2); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), temp_reg1); + SHR(adjust, temp_reg1); + } + // if we use push and pop txPtr.reg later, It will cause the bad locality, since the esp is already been subtracted before the loop. + // we will spill txPtr.reg due to the limited register + mCurSp = mCurSp - 4; + int txPtr_offset_ebp = mCurSp; + MOV_REG_TO_MEM(txPtr.reg, txPtr_offset_ebp, EBP); + //PUSH(txPtr.reg); + + int temp_reg3 = txPtr.reg; + MOV_REG_TO_REG(temp_reg2, temp_reg3); + IMUL(temp_reg1, temp_reg3); + ADD_REG_TO_MEM(temp_reg3, EBP, dh_offset_ebp); + SHR(8, pixel); + AND_REG_TO_REG(mask, pixel); + IMUL(temp_reg1, pixel); + ADD_REG_TO_MEM(pixel, EBP, dl_offset_ebp); + SUB_REG_TO_REG(temp_reg1, u); + + + // LT -> (1-U)*(1-V) + MOV_MEM_TO_REG(reg_V.offset_ebp, EBP, temp_reg1); + NEG(temp_reg1); + ADD_IMM_TO_REG(1<<FRAC_BITS, temp_reg1); + MOV_REG_TO_MEM(temp_reg1, reg_V.offset_ebp, EBP); + MOV_MEM_TO_REG(reg_U.offset_ebp, EBP, temp_reg2); + + MOV_MEM_TO_REG(txPtr_offset_ebp, EBP, txPtr.reg); + //POP(txPtr.reg); + + MOV_MEM_TO_REG(0, txPtr.reg, pixel); + IMUL(temp_reg2, temp_reg1); + //we have already saved txPtr.reg + temp_reg3 = txPtr.reg; + MOV_REG_TO_REG(pixel, temp_reg3); + AND_REG_TO_REG(mask, temp_reg3); + if (adjust) { + if (round) + ADD_IMM_TO_REG(1<<(adjust-1), temp_reg1); + SHR(adjust, temp_reg1); + } + IMUL(temp_reg1, temp_reg3); + ADD_REG_TO_MEM(temp_reg3, EBP, dh_offset_ebp); + SHR(8, pixel); + AND_REG_TO_REG(mask, pixel); + IMUL(temp_reg1, pixel); + ADD_REG_TO_MEM(pixel, EBP, dl_offset_ebp); + + // RT -> U*(1-V) + SUB_REG_TO_REG(temp_reg1, u); + mBuilderContext.Rctx = temp_reg2; + MOV_MEM_TO_REG(8, EBP, mBuilderContext.Rctx); + CONTEXT_LOAD(offset, generated_vars.rt); + + MOV_MEM_TO_REG(txPtr_offset_ebp, EBP, txPtr.reg); + //POP(txPtr.reg); + + //MOV_MEM_SCALE_TO_REG(txPtr.reg, offset, 1, temp_reg2); + ADD_REG_TO_REG(txPtr.reg, offset); + MOV_MEM_TO_REG(0, offset, temp_reg2); + + MOV_REG_TO_REG(temp_reg2, pixel); + AND_REG_TO_REG(mask, temp_reg2); + IMUL(u, temp_reg2); + ADD_REG_TO_MEM(temp_reg2, EBP, dh_offset_ebp); + SHR(8, pixel); + AND_REG_TO_REG(mask, pixel); + IMUL(u, pixel); + ADD_REG_TO_MEM(pixel, EBP, dl_offset_ebp); + MOV_MEM_TO_REG(dh_offset_ebp, EBP, temp_reg1); + MOV_MEM_TO_REG(dl_offset_ebp, EBP, temp_reg2); + SHR(8, temp_reg1); + AND_REG_TO_REG(mask, temp_reg1); + SHL(8, mask); + AND_REG_TO_REG(mask, temp_reg2); + OR_REG_TO_REG(temp_reg1, temp_reg2); + MOV_REG_TO_MEM(temp_reg2, texel.offset_ebp, EBP); + scratches.recycle(u); + scratches.recycle(mask); + scratches.recycle(pixel); + scratches.recycle(txPtr.reg); + scratches.recycle(temp_reg1); + scratches.recycle(temp_reg2); + +} + +void GGLX86Assembler::build_texture_environment( + component_t& fragment, + fragment_parts_t& parts, + int component, + Scratch& regs) +{ + const uint32_t component_mask = 1<<component; + const bool multiTexture = mTextureMachine.activeUnits > 1; + Scratch scratches(registerFile()); + for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { + texture_unit_t& tmu = mTextureMachine.tmu[i]; + + if (tmu.mask & component_mask) { + // replace or modulate with this texture + if ((tmu.replaced & component_mask) == 0) { + // not replaced by a later tmu... + + pixel_t texel(parts.texel[i]); + if (multiTexture && + tmu.swrap == GGL_NEEDS_WRAP_11 && + tmu.twrap == GGL_NEEDS_WRAP_11) + { + texel.reg = scratches.obtain(); + texel.flags |= CORRUPTIBLE; + mCurSp = mCurSp - 4; + texel.offset_ebp = mCurSp; + comment("fetch texel (multitexture 1:1)"); + parts.coords[i].ptr.reg = scratches.obtain(); + MOV_MEM_TO_REG(parts.coords[i].ptr.offset_ebp, EBP, parts.coords[i].ptr.reg); + load(parts.coords[i].ptr, texel, WRITE_BACK); + MOV_REG_TO_MEM(texel.reg, texel.offset_ebp, EBP); + scratches.recycle(parts.coords[i].ptr.reg); + } else { + // the texel is already loaded in building textures + texel.reg = scratches.obtain(); + MOV_MEM_TO_REG(texel.offset_ebp, EBP, texel.reg); + } + + component_t incoming(fragment); + modify(fragment, regs); + + switch (tmu.env) { + case GGL_REPLACE: + extract(fragment, texel, component); + break; + case GGL_MODULATE: + modulate(fragment, incoming, texel, component); + break; + case GGL_DECAL: + decal(fragment, incoming, texel, component); + break; + case GGL_BLEND: + blend(fragment, incoming, texel, component, i); + break; + case GGL_ADD: + add(fragment, incoming, texel, component); + break; + } + scratches.recycle(texel.reg); + } + } + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::wrapping( + int d, + int coord, int size, + int tx_wrap, int tx_linear, Scratch& scratches) +{ + // coord is recycled after return, so it can be written. + // notes: + // if tx_linear is set, we need 4 extra bits of precision on the result + // SMULL/UMULL is 3 cycles + // coord is actually s.reg or t.reg which will not be used + int c = coord; + if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { + // UMULL takes 4 cycles (interlocked), and we can get away with + // 2 cycles using SMULWB, but we're loosing 16 bits of precision + // out of 32 (this is not a problem because the iterator keeps + // its full precision) + // UMULL(AL, 0, size, d, c, size); + // note: we can't use SMULTB because it's signed. + MOV_REG_TO_REG(c, d); + SHR(16-tx_linear, d); + int temp_reg; + if(c != EDX) + temp_reg = c; + else { + temp_reg = scratches.obtain(); + scratches.recycle(c); + } + int flag_push_edx = -1; + int flag_reserve_edx = -1; + int edx_offset_ebp = 0; + if(scratches.isUsed(EDX) == 1) { //not indicates that the registers are used up. Probably, previous allocated registers are recycled + if((d != EDX) && (size != EDX)) { + flag_push_edx = 1; + mCurSp = mCurSp - 4; + edx_offset_ebp = mCurSp; + MOV_REG_TO_MEM(EDX, edx_offset_ebp, EBP); + //PUSH(EDX); + } + } + else { + flag_reserve_edx = 1; + scratches.reserve(EDX); + } + if(scratches.isUsed(EAX)) { + if( size == EAX || d == EAX) { + // size is actually width and height, which will probably be used after wrapping + MOV_REG_TO_REG(size, temp_reg); + MOVSX_REG_TO_REG(OpndSize_16, size, size); + if(size == EAX) + IMUL(d); + else + IMUL(size); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d); + + MOV_REG_TO_REG(temp_reg, size); + } + else { + if(temp_reg != EAX) + MOV_REG_TO_REG(EAX, temp_reg); + MOV_REG_TO_REG(size, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(d); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d); + if(temp_reg != EAX) + MOV_REG_TO_REG(temp_reg, EAX); + } + } + else { + MOV_REG_TO_REG(size, EAX); + MOVSX_REG_TO_REG(OpndSize_16, EAX, EAX); + IMUL(d); + SHL(16, EDX); + SHR(16, EAX); + MOV_REG_TO_REG(EAX, EDX, OpndSize_16); + MOV_REG_TO_REG(EDX, d); + } + if(flag_push_edx == 1) { + MOV_MEM_TO_REG(edx_offset_ebp, EBP, EDX); + //POP(EDX); + } + if(flag_reserve_edx ==1) + scratches.recycle(EDX); + + scratches.recycle(temp_reg); + //IMUL(size, d) will cause segmentation fault with GlobalTime + } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { + if (tx_linear) { + // 1 cycle + MOV_REG_TO_REG(coord, d); + SAR(16-tx_linear, d); + } else { + SAR(16, coord); + MOV_REG_TO_REG(coord, d); + SAR(31, coord); + NOT(coord); + AND_REG_TO_REG(coord, d); + + MOV_REG_TO_REG(size, coord); + SUB_IMM_TO_REG(1, coord); + + CMP_REG_TO_REG(size, d); + CMOV_REG_TO_REG(Mnemonic_CMOVGE, coord, d); + + } + scratches.recycle(coord); + } +} + +// --------------------------------------------------------------------------- + +void GGLX86Assembler::modulate( + component_t& dest, + const component_t& incoming, + const pixel_t& incomingTexel, int component) +{ + Scratch locals(registerFile()); + integer_t texel(locals.obtain(), 32, CORRUPTIBLE); + extract(texel, incomingTexel, component); + + const int Nt = texel.size(); + // Nt should always be less than 10 bits because it comes + // from the TMU. + + int Ni = incoming.size(); + // Ni could be big because it comes from previous MODULATEs + + if (Nt == 1) { + // texel acts as a bit-mask + // dest = incoming & ((texel << incoming.h)-texel) + MOV_REG_TO_REG(texel.reg, dest.reg); + SHL(incoming.h, dest.reg); + SUB_REG_TO_REG(texel.reg, dest.reg); + dest.l = incoming.l; + dest.h = incoming.h; + dest.flags |= (incoming.flags & CLEAR_LO); + } else if (Ni == 1) { + SHL(31-incoming.h, incoming.reg); + MOV_REG_TO_REG(incoming.reg, dest.reg); + SAR(31, dest.reg); + AND_REG_TO_REG(texel.reg, dest.reg); + dest.l = 0; + dest.h = Nt; + } else { + int inReg = incoming.reg; + int shift = incoming.l; + if ((Nt + Ni) > 32) { + // we will overflow, reduce the precision of Ni to 8 bits + // (Note Nt cannot be more than 10 bits which happens with + // 565 textures and GGL_LINEAR) + shift += Ni-8; + Ni = 8; + } + + // modulate by the component with the lowest precision + if (Nt >= Ni) { + if (shift) { + // XXX: we should be able to avoid this shift + // when shift==16 && Nt<16 && Ni<16, in which + // we could use SMULBT below. + MOV_REG_TO_REG(inReg, dest.reg); + SHR(shift, inReg); + inReg = dest.reg; + shift = 0; + } + int temp_reg = locals.obtain(); + // operation: (Cf*Ct)/((1<<Ni)-1) + // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni + // this operation doesn't change texel's size + MOV_REG_TO_REG(inReg, temp_reg); + SHR(Ni-1, temp_reg); + MOV_REG_TO_REG(inReg, dest.reg); + ADD_REG_TO_REG(temp_reg, dest.reg); + locals.recycle(temp_reg); + if (Nt<16 && Ni<16) { + MOVSX_REG_TO_REG(OpndSize_16, texel.reg, texel.reg); + MOVSX_REG_TO_REG(OpndSize_16, dest.reg, dest.reg); + IMUL(texel.reg, dest.reg); + } + else + IMUL(texel.reg, dest.reg); + dest.l = Ni; + dest.h = Nt + Ni; + } else { + if (shift && (shift != 16)) { + // if shift==16, we can use 16-bits mul instructions later + MOV_REG_TO_REG(inReg, dest.reg); + SHR(shift, dest.reg); + inReg = dest.reg; + shift = 0; + } + // operation: (Cf*Ct)/((1<<Nt)-1) + // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt + // this operation doesn't change incoming's size + Scratch scratches(registerFile()); + int temp_reg = locals.obtain(); + int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; + if (t == inReg) + t = scratches.obtain(); + + MOV_REG_TO_REG(texel.reg, temp_reg); + SHR(Nt-1, temp_reg); + ADD_REG_TO_REG(temp_reg, texel.reg); + MOV_REG_TO_REG(texel.reg, t); + locals.recycle(temp_reg); + MOV_REG_TO_REG(inReg, dest.reg); + if (Nt<16 && Ni<16) { + if (shift==16) { + MOVSX_REG_TO_REG(OpndSize_16, t, t); + SHR(16, dest.reg); + MOVSX_REG_TO_REG(OpndSize_16, dest.reg, dest.reg); + IMUL(t, dest.reg); + } + else { + MOVSX_REG_TO_REG(OpndSize_16, dest.reg, dest.reg); + MOVSX_REG_TO_REG(OpndSize_16, t, t); + IMUL(t, dest.reg); + } + } else + IMUL(t, dest.reg); + dest.l = Nt; + dest.h = Nt + Ni; + } + + // low bits are not valid + dest.flags |= CLEAR_LO; + + // no need to keep more than 8 bits/component + if (dest.size() > 8) + dest.l = dest.h-8; + } +} + +void GGLX86Assembler::decal( + component_t& dest, + const component_t& incoming, + const pixel_t& incomingTexel, int component) +{ + // RGBA: + // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At + // Av = Af + Scratch locals(registerFile()); + integer_t texel(locals.obtain(), 32, CORRUPTIBLE); + integer_t factor(locals.obtain(), 32, CORRUPTIBLE); + extract(texel, incomingTexel, component); + extract(factor, incomingTexel, GGLFormat::ALPHA); + + // no need to keep more than 8-bits for decal + int Ni = incoming.size(); + int shift = incoming.l; + if (Ni > 8) { + shift += Ni-8; + Ni = 8; + } + integer_t incomingNorm(incoming.reg, Ni, incoming.flags); + if (shift) { + SHR(shift, incomingNorm.reg); + MOV_REG_TO_REG(incomingNorm.reg, dest.reg); + incomingNorm.reg = dest.reg; + incomingNorm.flags |= CORRUPTIBLE; + } + int temp = locals.obtain(); + MOV_REG_TO_REG(factor.reg, temp); + SHR(factor.s-1, temp); + ADD_REG_TO_REG(temp, factor.reg); + locals.recycle(temp); + build_blendOneMinusFF(dest, factor, incomingNorm, texel); +} + +void GGLX86Assembler::blend( + component_t& dest, + const component_t& incoming, + const pixel_t& incomingTexel, int component, int tmu) +{ + // RGBA: + // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct + // Av = At*Af + + if (component == GGLFormat::ALPHA) { + modulate(dest, incoming, incomingTexel, component); + return; + } + + Scratch locals(registerFile()); + int temp = locals.obtain(); + integer_t color(locals.obtain(), 8, CORRUPTIBLE); + integer_t factor(locals.obtain(), 32, CORRUPTIBLE); + mBuilderContext.Rctx = temp; + MOV_MEM_TO_REG(8, PhysicalReg_EBP, mBuilderContext.Rctx); + MOVZX_MEM_TO_REG(OpndSize_8, mBuilderContext.Rctx, GGL_OFFSETOF(state.texture[tmu].env_color[component]), color.reg); + extract(factor, incomingTexel, component); + + // no need to keep more than 8-bits for blend + int Ni = incoming.size(); + int shift = incoming.l; + if (Ni > 8) { + shift += Ni-8; + Ni = 8; + } + integer_t incomingNorm(incoming.reg, Ni, incoming.flags); + if (shift) { + MOV_REG_TO_REG(incomingNorm.reg, dest.reg); + SHR(shift, dest.reg); + incomingNorm.reg = dest.reg; + incomingNorm.flags |= CORRUPTIBLE; + } + MOV_REG_TO_REG(factor.reg, temp); + SHR(factor.s-1, temp); + ADD_REG_TO_REG(temp, factor.reg); + locals.recycle(temp); + build_blendOneMinusFF(dest, factor, incomingNorm, color); +} + +void GGLX86Assembler::add( + component_t& dest, + const component_t& incoming, + const pixel_t& incomingTexel, int component) +{ + // RGBA: + // Cv = Cf + Ct; + Scratch locals(registerFile()); + + component_t incomingTemp(incoming); + + // use "dest" as a temporary for extracting the texel, unless "dest" + // overlaps "incoming". + integer_t texel(dest.reg, 32, CORRUPTIBLE); + if (dest.reg == incomingTemp.reg) + texel.reg = locals.obtain(); + extract(texel, incomingTexel, component); + + if (texel.s < incomingTemp.size()) { + expand(texel, texel, incomingTemp.size()); + } else if (texel.s > incomingTemp.size()) { + if (incomingTemp.flags & CORRUPTIBLE) { + expand(incomingTemp, incomingTemp, texel.s); + } else { + incomingTemp.reg = locals.obtain(); + expand(incomingTemp, incoming, texel.s); + } + } + + if (incomingTemp.l) { + MOV_REG_TO_REG(incomingTemp.reg, dest.reg); + SHR(incomingTemp.l, dest.reg); + ADD_REG_TO_REG(texel.reg, dest.reg); + } else { + MOV_REG_TO_REG(incomingTemp.reg, dest.reg); + ADD_REG_TO_REG(texel.reg, dest.reg); + } + dest.l = 0; + dest.h = texel.size(); + int temp_reg = locals.obtain(); + component_sat(dest, temp_reg); + locals.recycle(temp_reg); +} + +// ---------------------------------------------------------------------------- + +}; // namespace android diff --git a/libpixelflinger/pixelflinger.cpp b/libpixelflinger/pixelflinger.cpp index fd449b2..f06154f 100644 --- a/libpixelflinger/pixelflinger.cpp +++ b/libpixelflinger/pixelflinger.cpp @@ -32,7 +32,11 @@ #include "scanline.h" #include "trap.h" +#if defined(__i386__) || defined(__x86_64__) +#include "codeflinger/x86/GGLX86Assembler.h" +#else #include "codeflinger/GGLAssembler.h" +#endif #include "codeflinger/CodeCache.h" #include <stdio.h> diff --git a/libpixelflinger/scanline.cpp b/libpixelflinger/scanline.cpp index 3d14531..5ef932b 100644 --- a/libpixelflinger/scanline.cpp +++ b/libpixelflinger/scanline.cpp @@ -34,7 +34,12 @@ #include "scanline.h" #include "codeflinger/CodeCache.h" +#if defined(__i386__) || defined(__x86_64__) +#include "codeflinger/x86/GGLX86Assembler.h" +#include "codeflinger/x86/X86Assembler.h" +#else #include "codeflinger/GGLAssembler.h" +#endif #if defined(__arm__) #include "codeflinger/ARMAssembler.h" #elif defined(__aarch64__) @@ -61,6 +66,8 @@ #if defined(__arm__) || (defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6) || defined(__aarch64__) # define ANDROID_ARM_CODEGEN 1 +#elif defined(__i386__) +# define ANDROID_IA32_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 #endif @@ -284,7 +291,7 @@ static const needs_filter_t fill16noblend = { // ---------------------------------------------------------------------------- -#if ANDROID_ARM_CODEGEN +#if ANDROID_ARM_CODEGEN || ANDROID_IA32_CODEGEN #if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 static CodeCache gCodeCache(32 * 1024); @@ -316,7 +323,7 @@ void ggl_uninit_scanline(context_t* c) { if (c->state.buffers.coverage) free(c->state.buffers.coverage); -#if ANDROID_ARM_CODEGEN +#if ANDROID_ARM_CODEGEN || ANDROID_IA32_CODEGEN if (c->scanline_as) c->scanline_as->decStrong(c); #endif @@ -436,6 +443,39 @@ static void pick_scanline(context_t* c) c->scanline_as = assembly.get(); c->scanline_as->incStrong(c); // hold on to assembly c->scanline = (void(*)(context_t* c))assembly->base(); +#elif ANDROID_IA32_CODEGEN + const AssemblyKey<needs_t> key(c->state.needs); + sp<Assembly> assembly = gCodeCache.lookup(key); + if (assembly == 0) { + // create a new assembly region + sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, + ASSEMBLY_SCRATCH_SIZE); + // initialize our assembler + GGLX86Assembler assembler( a ); + // generate the scanline code for the given needs + int err = assembler.scanline(c->state.needs, c); + if (ggl_likely(!err)) { + // finally, cache this assembly + err = gCodeCache.cache(a->key(), a); + } + if (ggl_unlikely(err)) { + ALOGE("error generating or caching assembly. Reverting to NOP. cache_err: %d \n", err); + c->scanline = scanline_noop; + c->init_y = init_y_noop; + c->step_y = step_y__nop; + return; + } + assembly = a; + } + + // release the previous assembly + if (c->scanline_as) { + c->scanline_as->decStrong(c); + } + + c->scanline_as = assembly.get(); + c->scanline_as->incStrong(c); // hold on to assembly + c->scanline = (void(*)(context_t* c))assembly->base(); #else // ALOGW("using generic (slow) pixel-pipeline"); c->scanline = scanline; @@ -464,7 +504,7 @@ static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, const pixel_t* src, const pixel_t* dst); static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); -#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) +#if (ANDROID_ARM_CODEGEN || ANDROID_IA32_CODEGEN) && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) // no need to compile the generic-pipeline, it can't be reached void scanline(context_t*) @@ -939,7 +979,7 @@ discard: } } -#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) +#endif // (ANDROID_ARM_CODEGEN || ANDROID_IA32_CODEGEN) && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) // ---------------------------------------------------------------------------- #if 0 diff --git a/libpixelflinger/tests/codegen/Android.mk b/libpixelflinger/tests/codegen/Android.mk index 2f9ca2f..045a68e 100644 --- a/libpixelflinger/tests/codegen/Android.mk +++ b/libpixelflinger/tests/codegen/Android.mk @@ -1,8 +1,13 @@ LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) +ifeq ($(TARGET_ARCH),x86) LOCAL_SRC_FILES:= \ - codegen.cpp.arm + codegen.cpp +else +LOCAL_SRC_FILES:= \ + codegen.cpp.arm +endif LOCAL_SHARED_LIBRARIES := \ libcutils \ @@ -11,6 +16,10 @@ LOCAL_SHARED_LIBRARIES := \ LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../.. +ifeq ($(TARGET_ARCH),x86) +LOCAL_STATIC_LIBRARIES := libenc +endif + LOCAL_MODULE:= test-opengl-codegen LOCAL_MODULE_TAGS := tests diff --git a/libpixelflinger/tests/codegen/codegen.cpp b/libpixelflinger/tests/codegen/codegen.cpp index 148b6f4..f5c7136 100644 --- a/libpixelflinger/tests/codegen/codegen.cpp +++ b/libpixelflinger/tests/codegen/codegen.cpp @@ -7,15 +7,22 @@ #include "scanline.h" #include "codeflinger/CodeCache.h" +#if defined(__i386__) || defined(__x86_64__) +#include "codeflinger/x86/GGLX86Assembler.h" +#include "codeflinger/x86/X86Assembler.h" +#else #include "codeflinger/GGLAssembler.h" #include "codeflinger/ARMAssembler.h" #if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 #include "codeflinger/MIPSAssembler.h" #endif #include "codeflinger/Arm64Assembler.h" +#endif #if defined(__arm__) || (defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6) || defined(__aarch64__) # define ANDROID_ARM_CODEGEN 1 +#elif defined(__i386__) +# define ANDROID_IA32_CODEGEN 1 #else # define ANDROID_ARM_CODEGEN 0 #endif @@ -40,7 +47,6 @@ public: static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) { -#if ANDROID_ARM_CODEGEN GGLContext* c; gglInit(&c); needs_t needs; @@ -48,6 +54,7 @@ static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) needs.p = p; needs.t[0] = t0; needs.t[1] = t1; +#if ANDROID_ARM_CODEGEN sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE)); #if defined(__arm__) @@ -66,10 +73,15 @@ static void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) if (err != 0) { printf("error %08x (%s)\n", err, strerror(-err)); } - gglUninit(c); -#else - printf("This test runs only on ARM, Arm64 or MIPS\n"); +#elif ANDROID_IA32_CODEGEN + sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE)); + GGLX86Assembler assembler( a ); + int err = assembler.scanline(needs, (context_t*)c); + if (err != 0) { + printf("error %08x (%s)\n", err, strerror(-err)); + } #endif + gglUninit(c); } int main(int argc, char** argv) diff --git a/logcat/logcat.cpp b/logcat/logcat.cpp index 07d0a5c..47f0136 100644 --- a/logcat/logcat.cpp +++ b/logcat/logcat.cpp @@ -654,7 +654,7 @@ int main(int argc, char **argv) break; case 'f': - if ((tail_time == log_time::EPOCH) && (tail_lines != 0)) { + if ((tail_time == log_time::EPOCH) && (tail_lines == 0)) { tail_time = lastLogTime(optarg); } // redirect output to a file diff --git a/logcat/tests/logcat_test.cpp b/logcat/tests/logcat_test.cpp index de2db67..9455d87 100644 --- a/logcat/tests/logcat_test.cpp +++ b/logcat/tests/logcat_test.cpp @@ -15,10 +15,12 @@ */ #include <ctype.h> +#include <dirent.h> #include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/types.h> #include <gtest/gtest.h> #include <log/log.h> @@ -284,7 +286,7 @@ TEST(logcat, get_size) { while (fgets(buffer, sizeof(buffer), fp)) { int size, consumed, max, payload; - char size_mult[2], consumed_mult[2]; + char size_mult[3], consumed_mult[3]; long full_size, full_consumed; size = consumed = max = payload = 0; @@ -489,12 +491,12 @@ TEST(logcat, logrotate) { static const char comm[] = "logcat -b radio -b events -b system -b main" " -d -f %s/log.txt -n 7 -r 1"; char command[sizeof(buf) + sizeof(comm)]; - sprintf(command, comm, buf); + snprintf(command, sizeof(command), comm, buf); int ret; EXPECT_FALSE((ret = system(command))); if (!ret) { - sprintf(command, "ls -s %s 2>/dev/null", buf); + snprintf(command, sizeof(command), "ls -s %s 2>/dev/null", buf); FILE *fp; EXPECT_TRUE(NULL != (fp = popen(command, "r"))); @@ -503,16 +505,12 @@ TEST(logcat, logrotate) { int count = 0; while (fgets(buffer, sizeof(buffer), fp)) { - static const char match_1[] = "4 log.txt"; - static const char match_2[] = "8 log.txt"; - static const char match_3[] = "12 log.txt"; - static const char match_4[] = "16 log.txt"; static const char total[] = "total "; + int num; + char c; - if (!strncmp(buffer, match_1, sizeof(match_1) - 1) - || !strncmp(buffer, match_2, sizeof(match_2) - 1) - || !strncmp(buffer, match_3, sizeof(match_3) - 1) - || !strncmp(buffer, match_4, sizeof(match_4) - 1)) { + if ((2 == sscanf(buffer, "%d log.tx%c", &num, &c)) && + (num <= 24)) { ++count; } else if (strncmp(buffer, total, sizeof(total) - 1)) { fprintf(stderr, "WARNING: Parse error: %s", buffer); @@ -522,7 +520,7 @@ TEST(logcat, logrotate) { EXPECT_TRUE(count == 7 || count == 8); } } - sprintf(command, "rm -rf %s", buf); + snprintf(command, sizeof(command), "rm -rf %s", buf); EXPECT_FALSE(system(command)); } @@ -534,12 +532,12 @@ TEST(logcat, logrotate_suffix) { static const char logcat_cmd[] = "logcat -b radio -b events -b system -b main" " -d -f %s/log.txt -n 10 -r 1"; char command[sizeof(tmp_out_dir) + sizeof(logcat_cmd)]; - sprintf(command, logcat_cmd, tmp_out_dir); + snprintf(command, sizeof(command), logcat_cmd, tmp_out_dir); int ret; EXPECT_FALSE((ret = system(command))); if (!ret) { - sprintf(command, "ls %s 2>/dev/null", tmp_out_dir); + snprintf(command, sizeof(command), "ls %s 2>/dev/null", tmp_out_dir); FILE *fp; EXPECT_TRUE(NULL != (fp = popen(command, "r"))); @@ -575,7 +573,113 @@ TEST(logcat, logrotate_suffix) { pclose(fp); EXPECT_EQ(11, log_file_count); } - sprintf(command, "rm -rf %s", tmp_out_dir); + snprintf(command, sizeof(command), "rm -rf %s", tmp_out_dir); + EXPECT_FALSE(system(command)); +} + +TEST(logcat, logrotate_continue) { + static const char tmp_out_dir_form[] = "/data/local/tmp/logcat.logrotate.XXXXXX"; + char tmp_out_dir[sizeof(tmp_out_dir_form)]; + ASSERT_TRUE(NULL != mkdtemp(strcpy(tmp_out_dir, tmp_out_dir_form))); + + static const char log_filename[] = "log.txt"; + static const char logcat_cmd[] = "logcat -b all -d -f %s/%s -n 256 -r 1024"; + static const char cleanup_cmd[] = "rm -rf %s"; + char command[sizeof(tmp_out_dir) + sizeof(logcat_cmd) + sizeof(log_filename)]; + snprintf(command, sizeof(command), logcat_cmd, tmp_out_dir, log_filename); + + int ret; + EXPECT_FALSE((ret = system(command))); + if (ret) { + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); + EXPECT_FALSE(system(command)); + return; + } + FILE *fp; + snprintf(command, sizeof(command), "%s/%s", tmp_out_dir, log_filename); + EXPECT_TRUE(NULL != ((fp = fopen(command, "r")))); + if (!fp) { + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); + EXPECT_FALSE(system(command)); + return; + } + char *line = NULL; + char *last_line = NULL; // this line is allowed to stutter, one-line overlap + char *second_last_line = NULL; + size_t len = 0; + while (getline(&line, &len, fp) != -1) { + free(second_last_line); + second_last_line = last_line; + last_line = line; + line = NULL; + } + fclose(fp); + free(line); + if (second_last_line == NULL) { + fprintf(stderr, "No second to last line, using last, test may fail\n"); + second_last_line = last_line; + last_line = NULL; + } + free(last_line); + EXPECT_TRUE(NULL != second_last_line); + if (!second_last_line) { + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); + EXPECT_FALSE(system(command)); + return; + } + // re-run the command, it should only add a few lines more content if it + // continues where it left off. + snprintf(command, sizeof(command), logcat_cmd, tmp_out_dir, log_filename); + EXPECT_FALSE((ret = system(command))); + if (ret) { + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); + EXPECT_FALSE(system(command)); + return; + } + DIR *dir; + EXPECT_TRUE(NULL != (dir = opendir(tmp_out_dir))); + if (!dir) { + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); + EXPECT_FALSE(system(command)); + return; + } + struct dirent *entry; + unsigned count = 0; + while ((entry = readdir(dir))) { + if (strncmp(entry->d_name, log_filename, sizeof(log_filename) - 1)) { + continue; + } + snprintf(command, sizeof(command), "%s/%s", tmp_out_dir, entry->d_name); + EXPECT_TRUE(NULL != ((fp = fopen(command, "r")))); + if (!fp) { + fprintf(stderr, "%s ?\n", command); + continue; + } + line = NULL; + size_t number = 0; + while (getline(&line, &len, fp) != -1) { + ++number; + if (!strcmp(line, second_last_line)) { + EXPECT_TRUE(++count <= 1); + fprintf(stderr, "%s(%zu):\n", entry->d_name, number); + } + } + fclose(fp); + free(line); + unlink(command); + } + closedir(dir); + if (count > 1) { + char *brk = strpbrk(second_last_line, "\r\n"); + if (!brk) { + brk = second_last_line + strlen(second_last_line); + } + fprintf(stderr, "\"%.*s\" occured %u times\n", + (int)(brk - second_last_line), second_last_line, count); + } + free(second_last_line); + + snprintf(command, sizeof(command), cleanup_cmd, tmp_out_dir); EXPECT_FALSE(system(command)); } diff --git a/rootdir/Android.mk b/rootdir/Android.mk index 7ab76b8..836e585 100644 --- a/rootdir/Android.mk +++ b/rootdir/Android.mk @@ -30,16 +30,21 @@ LOCAL_POST_INSTALL_CMD := mkdir -p $(addprefix $(TARGET_ROOT_OUT)/, \ include $(BUILD_SYSTEM)/base_rules.mk -# Regenerate init.environ.rc if PRODUCT_BOOTCLASSPATH has changed. -bcp_md5 := $(word 1, $(shell echo $(PRODUCT_BOOTCLASSPATH) $(PRODUCT_SYSTEM_SERVER_CLASSPATH) | $(MD5SUM))) +# Regenerate init.environ.rc if PRODUCT_BOOTCLASSPATH or TARGET_LDPRELOAD has changed. +bcp_md5 := $(word 1, $(shell echo $(PRODUCT_BOOTCLASSPATH) $(PRODUCT_SYSTEM_SERVER_CLASSPATH) $(TARGET_LDPRELOAD) | $(MD5SUM))) bcp_dep := $(intermediates)/$(bcp_md5).bcp.dep $(bcp_dep) : $(hide) mkdir -p $(dir $@) && rm -rf $(dir $@)*.bcp.dep && touch $@ +ifneq ($(strip $(TARGET_LDPRELOAD)),) + TARGET_LDPRELOAD_STR := :$(TARGET_LDPRELOAD) +endif + $(LOCAL_BUILT_MODULE): $(LOCAL_PATH)/init.environ.rc.in $(bcp_dep) @echo "Generate: $< -> $@" @mkdir -p $(dir $@) - $(hide) sed -e 's?%BOOTCLASSPATH%?$(PRODUCT_BOOTCLASSPATH)?g' $< >$@ + $(hide) sed -e 's?%BOOTCLASSPATH%?$(PRODUCT_BOOTCLASSPATH)?g'\ + -e 's?%TARGET_LDPRELOAD%?$(TARGET_LDPRELOAD_STR)?g' $< >$@ $(hide) sed -i -e 's?%SYSTEMSERVERCLASSPATH%?$(PRODUCT_SYSTEM_SERVER_CLASSPATH)?g' $@ bcp_md5 := diff --git a/rootdir/init.environ.rc.in b/rootdir/init.environ.rc.in index b34ea01..46ec1fb 100644 --- a/rootdir/init.environ.rc.in +++ b/rootdir/init.environ.rc.in @@ -9,3 +9,4 @@ on init export ASEC_MOUNTPOINT /mnt/asec export BOOTCLASSPATH %BOOTCLASSPATH% export SYSTEMSERVERCLASSPATH %SYSTEMSERVERCLASSPATH% + export LD_PRELOAD libsigchain.so%TARGET_LDPRELOAD% diff --git a/rootdir/init.rc b/rootdir/init.rc index 5c6b606..78adacc 100644 --- a/rootdir/init.rc +++ b/rootdir/init.rc @@ -21,6 +21,8 @@ on early-init # Set the security context of /adb_keys if present. restorecon /adb_keys + mount debugfs /sys/kernel/debug /sys/kernel/debug mode=755 + start ueventd on init @@ -169,13 +171,19 @@ on init chown system system /dev/cpuset/foreground chown system system /dev/cpuset/foreground/boost chown system system /dev/cpuset/background + chown system system /dev/cpuset/system-background chown system system /dev/cpuset/tasks chown system system /dev/cpuset/foreground/tasks chown system system /dev/cpuset/foreground/boost/tasks chown system system /dev/cpuset/background/tasks + chown system system /dev/cpuset/system-background/tasks + + # set system-background to 0775 so SurfaceFlinger can touch it + chmod 0775 /dev/cpuset/system-background chmod 0664 /dev/cpuset/foreground/tasks chmod 0664 /dev/cpuset/foreground/boost/tasks chmod 0664 /dev/cpuset/background/tasks + chmod 0664 /dev/cpuset/system-background/tasks chmod 0664 /dev/cpuset/tasks @@ -653,7 +661,6 @@ service surfaceflinger /system/bin/surfaceflinger user system group graphics drmrpc onrestart restart zygote - writepid /dev/cpuset/system-background/tasks service drm /system/bin/drmserver class main @@ -759,10 +766,10 @@ on property:persist.logd.logpersistd=logcatd # all exec/services are called with umask(077), so no gain beyond 0700 mkdir /data/misc/logd 0700 logd log # logd for write to /data/misc/logd, log group for read from pstore (-L) - exec - logd log -- /system/bin/logcat -L -b all -v threadtime -v usec -v printable -D -f /data/misc/logd/logcat -r 64 -n 256 + exec - logd log -- /system/bin/logcat -L -b all -v threadtime -v usec -v printable -D -f /data/misc/logd/logcat -r 1024 -n 256 start logcatd -service logcatd /system/bin/logcat -b all -v threadtime -v usec -v printable -D -f /data/misc/logd/logcat -r 64 -n 256 +service logcatd /system/bin/logcat -b all -v threadtime -v usec -v printable -D -f /data/misc/logd/logcat -r 1024 -n 256 class late_start disabled # logd for write to /data/misc/logd, log group for read from log daemon diff --git a/rootdir/init.trace.rc b/rootdir/init.trace.rc index 50944e6..4933156 100644 --- a/rootdir/init.trace.rc +++ b/rootdir/init.trace.rc @@ -12,6 +12,7 @@ on boot chown root shell /sys/kernel/debug/tracing/options/print-tgid chown root shell /sys/kernel/debug/tracing/events/sched/sched_switch/enable chown root shell /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable + chown root shell /sys/kernel/debug/tracing/events/sched/sched_blocked_reason/enable chown root shell /sys/kernel/debug/tracing/events/power/cpu_frequency/enable chown root shell /sys/kernel/debug/tracing/events/power/cpu_idle/enable chown root shell /sys/kernel/debug/tracing/events/power/clock_set_rate/enable @@ -24,6 +25,7 @@ on boot chmod 0664 /sys/kernel/debug/tracing/options/print-tgid chmod 0664 /sys/kernel/debug/tracing/events/sched/sched_switch/enable chmod 0664 /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable + chmod 0664 /sys/kernel/debug/tracing/events/sched/sched_blocked_reason/enable chmod 0664 /sys/kernel/debug/tracing/events/power/cpu_frequency/enable chmod 0664 /sys/kernel/debug/tracing/events/power/cpu_idle/enable chmod 0664 /sys/kernel/debug/tracing/events/power/clock_set_rate/enable diff --git a/sdcard/sdcard.c b/sdcard/sdcard.c index 13009aa..33b1509 100644 --- a/sdcard/sdcard.c +++ b/sdcard/sdcard.c @@ -507,6 +507,16 @@ static void derive_permissions_locked(struct fuse* fuse, struct node *parent, } } +static void derive_permissions_recursive_locked(struct fuse* fuse, struct node *parent) { + struct node *node; + for (node = parent->child; node; node = node->next) { + derive_permissions_locked(fuse, parent, node); + if (node->child) { + derive_permissions_recursive_locked(fuse, node); + } + } +} + /* Kernel has already enforced everything we returned through * derive_permissions_locked(), so this is used to lock down access * even further, such as enforcing that apps hold sdcard_rw. */ @@ -1145,6 +1155,8 @@ static int handle_rename(struct fuse* fuse, struct fuse_handler* handler, res = rename_node_locked(child_node, new_name, new_actual_name); if (!res) { remove_node_from_parent_locked(child_node); + derive_permissions_locked(fuse, new_parent_node, child_node); + derive_permissions_recursive_locked(fuse, child_node); add_node_to_parent_locked(child_node, new_parent_node); } goto done; @@ -1203,11 +1215,11 @@ static int handle_open(struct fuse* fuse, struct fuse_handler* handler, out.fh = ptr_to_id(h); out.open_flags = 0; - #ifdef FUSE_SHORTCIRCUIT - out.lower_fd = h->fd; - #else - out.padding = 0; - #endif +#if defined(FUSE_STACKED_IO) || defined(FUSE_SHORTCIRCUIT) + out.lower_fd = h->fd; +#else + out.padding = 0; +#endif fuse_reply(fuse, hdr->unique, &out, sizeof(out)); return NO_STATUS; @@ -1373,11 +1385,11 @@ static int handle_opendir(struct fuse* fuse, struct fuse_handler* handler, out.fh = ptr_to_id(h); out.open_flags = 0; - #ifdef FUSE_SHORTCIRCUIT - out.lower_fd = -1; - #else - out.padding = 0; - #endif +#if defined(FUSE_STACKED_IO) || defined(FUSE_SHORTCIRCUIT) + out.lower_fd = -1; +#else + out.padding = 0; +#endif fuse_reply(fuse, hdr->unique, &out, sizeof(out)); return NO_STATUS; @@ -1461,9 +1473,12 @@ static int handle_init(struct fuse* fuse, struct fuse_handler* handler, out.max_readahead = req->max_readahead; out.flags = FUSE_ATOMIC_O_TRUNC | FUSE_BIG_WRITES; - #ifdef FUSE_SHORTCIRCUIT - out.flags |= FUSE_SHORTCIRCUIT; - #endif +#ifdef FUSE_SHORTCIRCUIT + out.flags |= FUSE_SHORTCIRCUIT; +#endif +#ifdef FUSE_STACKED_IO + out.flags |= FUSE_STACKED_IO; +#endif out.max_background = 32; out.congestion_threshold = 32; @@ -1680,6 +1695,10 @@ static int read_package_list(struct fuse_global* global) { TRACE("read_package_list: found %zu packages\n", hashmapSize(global->package_to_appid)); fclose(file); + + /* Regenerate ownership details using newly loaded mapping */ + derive_permissions_recursive_locked(global->fuse_default, &global->root); + pthread_mutex_unlock(&global->lock); return 0; } |