| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /** |
| * @author Alexander V. Astapchuk |
| */ |
| |
| /** |
| * @file |
| * @brief Main decoding (disassembling) routines implementation. |
| */ |
| |
| #include "dec_base.h" |
| #include "open/common.h" |
| |
| bool DecoderBase::is_prefix(const unsigned char * bytes) |
| { |
| unsigned char b0 = *bytes; |
| unsigned char b1 = *(bytes+1); |
| if (b0 == 0xF0) { // LOCK |
| return true; |
| } |
| if (b0==0xF2 || b0==0xF3) { // REPNZ/REPZ prefixes |
| if (b1 == 0x0F) { // .... but may be a part of SIMD opcode |
| return false; |
| } |
| return true; |
| } |
| if (b0 == 0x2E || b0 == 0x36 || b0==0x3E || b0==0x26 || b0==0x64 || b0==0x3E) { |
| // branch hints, segment prefixes |
| return true; |
| } |
| if (b0==0x66) { // operand-size prefix |
| if (b1 == 0x0F) { // .... but may be a part of SIMD opcode |
| return false; |
| } |
| return false; //XXX - currently considered as part of opcode//true; |
| } |
| if (b0==0x67) { // address size prefix |
| return true; |
| } |
| return false; |
| } |
| |
| // Returns prefix count from 0 to 4, or ((unsigned int)-1) on error |
| unsigned int DecoderBase::fill_prefs(const unsigned char * bytes, Inst * pinst) |
| { |
| const unsigned char * my_bytes = bytes; |
| |
| while( 1 ) |
| { |
| unsigned char by1 = *my_bytes; |
| unsigned char by2 = *(my_bytes + 1); |
| Inst::PrefGroups where; |
| |
| switch( by1 ) |
| { |
| case InstPrefix_REPNE: |
| case InstPrefix_REP: |
| { |
| if( 0x0F == by2) |
| { |
| return pinst->prefc; |
| } |
| } |
| case InstPrefix_LOCK: |
| { |
| where = Inst::Group1; |
| break; |
| } |
| case InstPrefix_CS: |
| case InstPrefix_SS: |
| case InstPrefix_DS: |
| case InstPrefix_ES: |
| case InstPrefix_FS: |
| case InstPrefix_GS: |
| // case InstPrefix_HintTaken: the same as CS override |
| // case InstPrefix_HintNotTaken: the same as DS override |
| { |
| where = Inst::Group2; |
| break; |
| } |
| case InstPrefix_OpndSize: |
| { |
| if( 0x0F == by2) |
| { |
| return pinst->prefc; |
| } |
| where = Inst::Group3; |
| break; |
| } |
| case InstPrefix_AddrSize: |
| { |
| where = Inst::Group4; |
| break; |
| } |
| default: |
| { |
| return pinst->prefc; |
| } |
| } |
| // Assertions are not allowed here. |
| // Error situations should result in returning error status |
| if (InstPrefix_Null != pinst->pref[where]) //only one prefix in each group |
| return (unsigned int)-1; |
| |
| pinst->pref[where] = (InstPrefix)by1; |
| |
| if (pinst->prefc >= 4) //no more than 4 prefixes |
| return (unsigned int)-1; |
| |
| pinst->prefc++; |
| ++my_bytes; |
| } |
| } |
| |
| |
| |
| unsigned DecoderBase::decode(const void * addr, Inst * pinst) |
| { |
| Inst tmp; |
| |
| //assert( *(unsigned char*)addr != 0x66); |
| |
| const unsigned char * bytes = (unsigned char*)addr; |
| |
| // Load up to 4 prefixes |
| // for each Mnemonic |
| unsigned int pref_count = fill_prefs(bytes, &tmp); |
| |
| if (pref_count == (unsigned int)-1) // Wrong prefix sequence, or >4 prefixes |
| return 0; // Error |
| |
| bytes += pref_count; |
| |
| // for each opcodedesc |
| // if (raw_len == 0) memcmp(, raw_len) |
| // else check the mixed state which is one of the following: |
| // /digit /i /rw /rd /rb |
| |
| bool found = false; |
| const unsigned char * saveBytes = bytes; |
| for (unsigned mn=1; mn<Mnemonic_Count; mn++) { |
| bytes = saveBytes; |
| found=try_mn((Mnemonic)mn, &bytes, &tmp); |
| if (found) { |
| tmp.mn = (Mnemonic)mn; |
| break; |
| } |
| } |
| if (!found) { |
| // Unknown opcode |
| return 0; |
| } |
| tmp.size = (unsigned)(bytes-(const unsigned char*)addr); |
| if (pinst) { |
| *pinst = tmp; |
| } |
| return tmp.size; |
| } |
| |
| #ifdef _EM64T_ |
| #define EXTEND_REG(reg, flag) \ |
| ((NULL == rex || 0 == rex->flag) ? reg : (reg + 8)) |
| #else |
| #define EXTEND_REG(reg, flag) (reg) |
| #endif |
| |
| bool DecoderBase::decode_aux(const EncoderBase::OpcodeDesc& odesc, unsigned aux, |
| const unsigned char ** pbuf, Inst * pinst, const Rex UNREF *rex) |
| { |
| OpcodeByteKind kind = (OpcodeByteKind)(aux & OpcodeByteKind_KindMask); |
| unsigned byte = (aux & OpcodeByteKind_OpcodeMask); |
| unsigned data_byte = **pbuf; |
| EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; |
| const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; |
| |
| switch (kind) { |
| case OpcodeByteKind_SlashR: |
| //decodeModRM(odesc, pbuf, pinst); |
| { |
| const ModRM& modrm = *(ModRM*)*pbuf; |
| unsigned regIndex = odesc.opnds[0].kind == OpndKind_GPReg ? 0 : 1; |
| RegName reg = getRegName(OpndKind_GPReg, opndDesc.size, EXTEND_REG(modrm.reg, r)); |
| EncoderBase::Operand& regOpnd = pinst->operands[regIndex]; |
| if (regIndex == 0) { |
| regOpnd = EncoderBase::Operand(reg); |
| ++pinst->argc; |
| decodeModRM(odesc, pbuf, pinst, rex); |
| } |
| else { |
| decodeModRM(odesc, pbuf, pinst, rex); |
| ++pinst->argc; |
| regOpnd = EncoderBase::Operand(reg); |
| } |
| ++pinst->argc; |
| } |
| return true; |
| case OpcodeByteKind_rb: |
| case OpcodeByteKind_rw: |
| case OpcodeByteKind_rd: |
| { |
| // Gregory - |
| // Here we don't parse register because for current needs |
| // disassembler doesn't require to parse all operands |
| unsigned regid = data_byte - byte; |
| if (regid>7) { |
| return false; |
| } |
| OpndSize opnd_size; |
| switch(kind) |
| { |
| case OpcodeByteKind_rb: |
| { |
| opnd_size = OpndSize_8; |
| break; |
| } |
| case OpcodeByteKind_rw: |
| { |
| opnd_size = OpndSize_16; |
| break; |
| } |
| case OpcodeByteKind_rd: |
| { |
| opnd_size = OpndSize_32; |
| break; |
| } |
| default: |
| assert( false ); |
| } |
| opnd = EncoderBase::Operand( getRegName(OpndKind_GPReg, opnd_size, regid) ); |
| |
| ++pinst->argc; |
| ++*pbuf; |
| return true; |
| } |
| case OpcodeByteKind_cb: |
| { |
| char offset = *(char*)*pbuf; |
| *pbuf += 1; |
| opnd = EncoderBase::Operand(offset); |
| ++pinst->argc; |
| //pinst->direct_addr = (void*)(pinst->offset + *pbuf); |
| } |
| return true; |
| case OpcodeByteKind_cw: |
| // not an error, but not expected in current env |
| return false; |
| case OpcodeByteKind_cd: |
| { |
| int offset = *(int*)*pbuf; |
| *pbuf += 4; |
| opnd = EncoderBase::Operand(offset); |
| ++pinst->argc; |
| } |
| return true; |
| case OpcodeByteKind_SlashNum: |
| { |
| const ModRM& modrm = *(ModRM*)*pbuf; |
| if (modrm.reg != byte) { |
| return false; |
| } |
| decodeModRM(odesc, pbuf, pinst, rex); |
| ++pinst->argc; |
| } |
| return true; |
| case OpcodeByteKind_ib: |
| { |
| char ival = *(char*)*pbuf; |
| opnd = EncoderBase::Operand(ival); |
| ++pinst->argc; |
| *pbuf += 1; |
| } |
| return true; |
| case OpcodeByteKind_iw: |
| { |
| short ival = *(short*)*pbuf; |
| opnd = EncoderBase::Operand(ival); |
| ++pinst->argc; |
| *pbuf += 2; |
| } |
| return true; |
| case OpcodeByteKind_id: |
| { |
| int ival = *(int*)*pbuf; |
| opnd = EncoderBase::Operand(ival); |
| ++pinst->argc; |
| *pbuf += 4; |
| } |
| return true; |
| #ifdef _EM64T_ |
| case OpcodeByteKind_io: |
| { |
| long long int ival = *(long long int*)*pbuf; |
| opnd = EncoderBase::Operand(OpndSize_64, ival); |
| ++pinst->argc; |
| *pbuf += 8; |
| } |
| return true; |
| #endif |
| case OpcodeByteKind_plus_i: |
| { |
| unsigned regid = data_byte - byte; |
| if (regid>7) { |
| return false; |
| } |
| ++*pbuf; |
| return true; |
| } |
| case OpcodeByteKind_ZeroOpcodeByte: // cant be here |
| return false; |
| default: |
| // unknown kind ? how comes ? |
| break; |
| } |
| return false; |
| } |
| |
| bool DecoderBase::try_mn(Mnemonic mn, const unsigned char ** pbuf, Inst * pinst) { |
| const unsigned char * save_pbuf = *pbuf; |
| EncoderBase::OpcodeDesc * opcodes = EncoderBase::opcodes[mn]; |
| |
| for (unsigned i=0; !opcodes[i].last; i++) { |
| const EncoderBase::OpcodeDesc& odesc = opcodes[i]; |
| char *opcode_ptr = const_cast<char *>(odesc.opcode); |
| int opcode_len = odesc.opcode_len; |
| Rex *prex = NULL; |
| #ifdef _EM64T_ |
| Rex rex; |
| #endif |
| |
| *pbuf = save_pbuf; |
| #ifdef _EM64T_ |
| // Match REX prefixes |
| unsigned char rex_byte = (*pbuf)[0]; |
| if ((rex_byte & 0xf0) == 0x40) |
| { |
| if ((rex_byte & 0x08) != 0) |
| { |
| // Have REX.W |
| if (opcode_len > 0 && opcode_ptr[0] == 0x48) |
| { |
| // Have REX.W in opcode. All mnemonics that allow |
| // REX.W have to have specified it in opcode, |
| // otherwise it is not allowed |
| rex = *(Rex *)*pbuf; |
| prex = &rex; |
| (*pbuf)++; |
| opcode_ptr++; |
| opcode_len--; |
| } |
| } |
| else |
| { |
| // No REX.W, so it doesn't have to be in opcode. We |
| // have REX.B, REX.X, REX.R or their combination, but |
| // not in opcode, they may extend any part of the |
| // instruction |
| rex = *(Rex *)*pbuf; |
| prex = &rex; |
| (*pbuf)++; |
| } |
| } |
| #endif |
| if (opcode_len != 0) { |
| if (memcmp(*pbuf, opcode_ptr, opcode_len)) { |
| continue; |
| } |
| *pbuf += opcode_len; |
| } |
| if (odesc.aux0 != 0) { |
| |
| if (!decode_aux(odesc, odesc.aux0, pbuf, pinst, prex)) { |
| continue; |
| } |
| if (odesc.aux1 != 0) { |
| if (!decode_aux(odesc, odesc.aux1, pbuf, pinst, prex)) { |
| continue; |
| } |
| } |
| pinst->odesc = &opcodes[i]; |
| return true; |
| } |
| else { |
| // Can't have empty opcode |
| assert(opcode_len != 0); |
| pinst->odesc = &opcodes[i]; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool DecoderBase::decodeModRM(const EncoderBase::OpcodeDesc& odesc, |
| const unsigned char ** pbuf, Inst * pinst, const Rex *rex) |
| { |
| EncoderBase::Operand& opnd = pinst->operands[pinst->argc]; |
| const EncoderBase::OpndDesc& opndDesc = odesc.opnds[pinst->argc]; |
| |
| //XXX debug ///assert(0x66 != *(*pbuf-2)); |
| const ModRM& modrm = *(ModRM*)*pbuf; |
| *pbuf += 1; |
| |
| RegName base = RegName_Null; |
| RegName index = RegName_Null; |
| int disp = 0; |
| unsigned scale = 0; |
| |
| // On x86_64 all mnemonics that allow REX.W have REX.W in opcode. |
| // Therefore REX.W is simply ignored, and opndDesc.size is used |
| |
| if (modrm.mod == 3) { |
| // we have only modrm. no sib, no disp. |
| RegName reg = getRegName(OpndKind_GPReg, opndDesc.size, EXTEND_REG(modrm.rm, b)); |
| opnd = EncoderBase::Operand(reg); |
| return true; |
| } |
| const SIB& sib = *(SIB*)*pbuf; |
| // check whether we have a sib |
| if (modrm.rm == 4) { |
| // yes, we have SIB |
| *pbuf += 1; |
| scale = sib.scale == 0 ? 0 : (1<<sib.scale); |
| if (sib.index != 4) { |
| index = getRegName(OpndKind_GPReg, opndDesc.size, EXTEND_REG(sib.index, x)); |
| } else { |
| // (sib.index == 4) => no index |
| } |
| |
| if (sib.base != 5 && modrm.mod != 0) { |
| base = getRegName(OpndKind_GPReg, opndDesc.size, EXTEND_REG(sib.base, b)); |
| } else { |
| // (sib.base == 5 && modrm.mod == 0) => no base |
| } |
| } |
| else { |
| if (modrm.mod != 0 || modrm.rm != 5) { |
| base = getRegName(OpndKind_GPReg, opndDesc.size, EXTEND_REG(modrm.rm, b)); |
| } |
| else { |
| // mod=0 && rm == 5 => only disp32 |
| } |
| } |
| |
| if (modrm.mod == 2) { |
| // have disp32 |
| disp = *(int*)*pbuf; |
| *pbuf += 4; |
| } |
| else if (modrm.mod == 1) { |
| // have disp8 |
| disp = *(char*)*pbuf; |
| *pbuf += 1; |
| } |
| else { |
| assert(modrm.mod == 0); |
| if (modrm.rm == 5) { |
| // have disp32 w/o sib |
| disp = *(int*)*pbuf; |
| *pbuf += 4; |
| } |
| else if (modrm.rm == 4 && sib.base == 5) { |
| // have disp32 with SI in sib |
| disp = *(int*)*pbuf; |
| *pbuf += 4; |
| } |
| } |
| opnd = EncoderBase::Operand(opndDesc.size, base, index, scale, disp); |
| return true; |
| } |
| |