mirror of
				https://github.com/netwide-assembler/nasm.git
				synced 2025-10-10 00:25:06 -04:00 
			
		
		
		
	Add (untested!) SSSE3, SSE4.1, SSE4.2 instructions
Add the SSSE3, SSE4.1 and SSE4.2 instruction sets. Change \332 to be a literal 0xF2 prefix, by analog with \333 for 0xF3 prefix (the previous \332 flag changed to \335). This is necessary to get the REX prefix in the right place for instructions that use it. We are going to have to go in and change existing instruction patterns which use these, as well.
This commit is contained in:
		
							
								
								
									
										17
									
								
								assemble.c
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								assemble.c
									
									
									
									
									
								
							| @@ -64,10 +64,10 @@ | |||||||
|  *                 to the condition code value of the instruction. |  *                 to the condition code value of the instruction. | ||||||
|  * \331          - instruction not valid with REP prefix.  Hint for |  * \331          - instruction not valid with REP prefix.  Hint for | ||||||
|  *                 disassembler only; for SSE instructions. |  *                 disassembler only; for SSE instructions. | ||||||
|  * \332          - disassemble a rep (0xF3 byte) prefix as repe not rep. |  * \332          - REP prefix (0xF2 byte) used as opcode extension. | ||||||
|  * \333          - REP prefix (0xF3 byte); for SSE instructions.  Not encoded |  * \333          - REP prefix (0xF3 byte) used as opcode extension. | ||||||
|  *                 as a literal byte in order to aid the disassembler. |  | ||||||
|  * \334          - LOCK prefix used instead of REX.R |  * \334          - LOCK prefix used instead of REX.R | ||||||
|  |  * \335          - disassemble a rep (0xF3 byte) prefix as repe not rep. | ||||||
|  * \340          - reserve <operand 0> bytes of uninitialized storage. |  * \340          - reserve <operand 0> bytes of uninitialized storage. | ||||||
|  *                 Operand 0 had better be a segmentless constant. |  *                 Operand 0 had better be a segmentless constant. | ||||||
|  * \364          - operand-size prefix (0x66) not permitted |  * \364          - operand-size prefix (0x66) not permitted | ||||||
| @@ -862,8 +862,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, | |||||||
|             codes++, length++; |             codes++, length++; | ||||||
|             break; |             break; | ||||||
|         case 0331: |         case 0331: | ||||||
|         case 0332: |  | ||||||
|             break; |             break; | ||||||
|  |         case 0332: | ||||||
|         case 0333: |         case 0333: | ||||||
|             length++; |             length++; | ||||||
|             break; |             break; | ||||||
| @@ -871,6 +871,8 @@ static int32_t calcsize(int32_t segment, int32_t offset, int bits, | |||||||
| 	    assert_no_prefix(ins, P_LOCK); | 	    assert_no_prefix(ins, P_LOCK); | ||||||
| 	    ins->rex |= REX_L; | 	    ins->rex |= REX_L; | ||||||
| 	    break; | 	    break; | ||||||
|  |         case 0335: | ||||||
|  | 	    break; | ||||||
|         case 0340: |         case 0340: | ||||||
|         case 0341: |         case 0341: | ||||||
|         case 0342: |         case 0342: | ||||||
| @@ -1360,11 +1362,11 @@ static void gencode(int32_t segment, int32_t offset, int bits, | |||||||
|             break; |             break; | ||||||
|  |  | ||||||
|         case 0331: |         case 0331: | ||||||
|         case 0332: |  | ||||||
|             break; |             break; | ||||||
|  |  | ||||||
|  | 	case 0332: | ||||||
|         case 0333: |         case 0333: | ||||||
|             *bytes = 0xF3; |             *bytes = c - 0332 + 0xF2; | ||||||
|             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); |             out(offset, segment, bytes, OUT_RAWDATA + 1, NO_SEG, NO_SEG); | ||||||
|             offset += 1; |             offset += 1; | ||||||
|             break; |             break; | ||||||
| @@ -1378,6 +1380,9 @@ static void gencode(int32_t segment, int32_t offset, int bits, | |||||||
|             ins->rex &= ~(REX_L|REX_R); |             ins->rex &= ~(REX_L|REX_R); | ||||||
|             break; |             break; | ||||||
|  |  | ||||||
|  |         case 0335: | ||||||
|  | 	    break; | ||||||
|  |  | ||||||
|         case 0340: |         case 0340: | ||||||
|         case 0341: |         case 0341: | ||||||
|         case 0342: |         case 0342: | ||||||
|   | |||||||
							
								
								
									
										7
									
								
								disasm.c
									
									
									
									
									
								
							
							
						
						
									
										7
									
								
								disasm.c
									
									
									
									
									
								
							| @@ -574,8 +574,8 @@ static int matches(const struct itemplate *t, uint8_t *data, | |||||||
|             if (prefix->rep) |             if (prefix->rep) | ||||||
|                 return FALSE; |                 return FALSE; | ||||||
| 	} else if (c == 0332) { | 	} else if (c == 0332) { | ||||||
|             if (drep == P_REP) | 	    if (prefix->rep != 0xF2) | ||||||
|                 drep = P_REPE; | 		return FALSE; | ||||||
|         } else if (c == 0333) { |         } else if (c == 0333) { | ||||||
|             if (prefix->rep != 0xF3) |             if (prefix->rep != 0xF3) | ||||||
|                 return FALSE; |                 return FALSE; | ||||||
| @@ -585,6 +585,9 @@ static int matches(const struct itemplate *t, uint8_t *data, | |||||||
| 		ins->rex |= REX_R; | 		ins->rex |= REX_R; | ||||||
| 		lock = 0; | 		lock = 0; | ||||||
| 	    } | 	    } | ||||||
|  |         } else if (c == 0335) { | ||||||
|  |             if (drep == P_REP) | ||||||
|  |                 drep = P_REPE; | ||||||
| 	} else if (c == 0364) { | 	} else if (c == 0364) { | ||||||
| 	    if (prefix->osp) | 	    if (prefix->osp) | ||||||
| 		return FALSE; | 		return FALSE; | ||||||
|   | |||||||
							
								
								
									
										88
									
								
								insns.dat
									
									
									
									
									
								
							
							
						
						
									
										88
									
								
								insns.dat
									
									
									
									
									
								
							| @@ -244,10 +244,10 @@ CMP       rm64,imm            \324\300\144\1\x81\207\141    X64,SM | |||||||
| CMP       mem,imm8            \300\1\x80\207\21             8086,SM | CMP       mem,imm8            \300\1\x80\207\21             8086,SM | ||||||
| CMP       mem,imm16           \320\300\134\1\x81\207\131    8086,SM | CMP       mem,imm16           \320\300\134\1\x81\207\131    8086,SM | ||||||
| CMP       mem,imm32           \321\300\144\1\x81\207\141    386,SM | CMP       mem,imm32           \321\300\144\1\x81\207\141    386,SM | ||||||
| CMPSB     void                \332\1\xA6                    8086 | CMPSB     void                \335\1\xA6                    8086 | ||||||
| CMPSD     void                \332\321\1\xA7                386 | CMPSD     void                \335\321\1\xA7                386 | ||||||
| CMPSQ     void                \332\324\1\xA7                X64 | CMPSQ     void                \335\324\1\xA7                X64 | ||||||
| CMPSW     void                \332\320\1\xA7                8086 | CMPSW     void                \335\320\1\xA7                8086 | ||||||
| CMPXCHG   mem,reg8            \300\2\x0F\xB0\101            PENT,SM | CMPXCHG   mem,reg8            \300\2\x0F\xB0\101            PENT,SM | ||||||
| CMPXCHG   reg8,reg8           \2\x0F\xB0\101                PENT | CMPXCHG   reg8,reg8           \2\x0F\xB0\101                PENT | ||||||
| CMPXCHG   mem,reg16           \320\300\2\x0F\xB1\101        PENT,SM | CMPXCHG   mem,reg16           \320\300\2\x0F\xB1\101        PENT,SM | ||||||
| @@ -1127,10 +1127,10 @@ SBB       rm64,imm            \324\300\144\1\x81\203\141    X64,SM | |||||||
| SBB       mem,imm8            \300\1\x80\203\21             8086,SM | SBB       mem,imm8            \300\1\x80\203\21             8086,SM | ||||||
| SBB       mem,imm16           \320\300\134\1\x81\203\131    8086,SM | SBB       mem,imm16           \320\300\134\1\x81\203\131    8086,SM | ||||||
| SBB       mem,imm32           \321\300\144\1\x81\203\141    386,SM | SBB       mem,imm32           \321\300\144\1\x81\203\141    386,SM | ||||||
| SCASB     void                \332\1\xAE                    8086 | SCASB     void                \335\1\xAE                    8086 | ||||||
| SCASD     void                \332\321\1\xAF                386 | SCASD     void                \335\321\1\xAF                386 | ||||||
| SCASQ     void                \332\324\1\xAF                X64 | SCASQ     void                \335\324\1\xAF                X64 | ||||||
| SCASW     void                \332\320\1\xAF                8086 | SCASW     void                \335\320\1\xAF                8086 | ||||||
| SFENCE    void                \3\x0F\xAE\xF8                X64,AMD | SFENCE    void                \3\x0F\xAE\xF8                X64,AMD | ||||||
| SGDT      mem                 \300\2\x0F\x01\200            286 | SGDT      mem                 \300\2\x0F\x01\200            286 | ||||||
| SHL       rm8,unity           \300\1\xD0\204                8086 | SHL       rm8,unity           \300\1\xD0\204                8086 | ||||||
| @@ -1948,3 +1948,75 @@ PSIGNW		mmxreg,mmxrm		\364\3\x0F\x38\x09\110		SSSE3,MMX,SQ | |||||||
| PSIGNW		xmmreg,xmmrm		\366\3\x0F\x38\x09\110		SSSE3 | PSIGNW		xmmreg,xmmrm		\366\3\x0F\x38\x09\110		SSSE3 | ||||||
| PSIGND		mmxreg,mmxrm		\364\3\x0F\x38\x0A\110		SSSE3,MMX,SQ | PSIGND		mmxreg,mmxrm		\364\3\x0F\x38\x0A\110		SSSE3,MMX,SQ | ||||||
| PSIGND		xmmreg,xmmrm		\366\3\x0F\x38\x0A\110		SSSE3 | PSIGND		xmmreg,xmmrm		\366\3\x0F\x38\x0A\110		SSSE3 | ||||||
|  |  | ||||||
|  | ; Penryn New Instructions (SSE4.1) | ||||||
|  | BLENDPD		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x0D\110\26	SSE41 | ||||||
|  | BLENDPS		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x0C\110\26	SSE41 | ||||||
|  | BLENDVPD	xmmreg,xmmrm,xmm0	\366\3\x0F\x38\x15\110		SSE41 | ||||||
|  | BLENDVPS	xmmreg,xmmrm,xmm0	\366\3\x0F\x38\x14\110		SSE41 | ||||||
|  | DPPD		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x41\110\26	SSE41 | ||||||
|  | DPPS		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x40\110\26	SSE41 | ||||||
|  | EXTRACTPS	rm32,xmmreg,imm		\366\3\x0F\x3A\x17\101\26	SSE41 | ||||||
|  | EXTRACTPS	reg64,xmmreg,imm	\324\366\3\x0F\x3A\x17\101\26	SSE41,X64 | ||||||
|  | INSERTPS	xmmreg,xmmrm,imm	\366\3\x0F\x3A\x21\110\26	SSE41,SD | ||||||
|  | MOVNTDQA	xmmreg,mem		\366\3\x0F\x38\x2A\110		SSE41 | ||||||
|  | MPSADBW		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x42\110\26	SSE41 | ||||||
|  | PACKUSDW	xmmreg,xmmrm		\366\3\x0F\x38\x2B\110		SSE41 | ||||||
|  | PBLENDVB	xmmreg,xmmrm,xmm0	\366\3\x0F\x38\x10\110		SSE41 | ||||||
|  | PBLENDW		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x0E\110\26	SSE41 | ||||||
|  | PCMPEQQ		xmmreg,xmmrm		\366\3\x0F\x38\x29\110		SSE41 | ||||||
|  | PEXTRB		reg32,xmmreg,imm	\366\3\x0F\x3A\x14\101\26	SSE41 | ||||||
|  | PEXTRB		mem8,xmmreg,imm		\366\3\x0F\x3A\x14\101\26	SSE41 | ||||||
|  | PEXTRB		reg64,xmmreg,imm	\324\366\3\x0F\x3A\x14\101\26	SSE41,X64 | ||||||
|  | PEXTRD		rm32,xmmreg,imm		\366\3\x0F\x3A\x16\101\26	SSE41 | ||||||
|  | PEXTRQ		rm64,xmmreg,imm		\366\3\x0F\x3A\x16\101\26	SSE41,X64 | ||||||
|  | PEXTRW		reg32,xmmreg,imm	\366\3\x0F\x3A\x15\101\26	SSE41 | ||||||
|  | PEXTRW		mem16,xmmreg,imm	\366\3\x0F\x3A\x15\101\26	SSE41 | ||||||
|  | PEXTRW		reg64,xmmreg,imm	\324\366\3\x0F\x3A\x15\101\26	SSE41,X64 | ||||||
|  | PHMINPOSUW	xmmreg,xmmrm		\366\3\x0F\x38\x41\110		SSE41 | ||||||
|  | PINSRB		xmmreg,reg32,imm	\366\3\x0F\x3A\x20\110\26	SSE41 | ||||||
|  | PINSRB		xmmreg,mem8,imm		\366\3\x0F\x3A\x20\110\26	SSE41 | ||||||
|  | PINSRD		xmmreg,rm32,imm		\366\3\x0F\x3A\x22\110\26	SSE41 | ||||||
|  | PINSRQ		xmmreg,rm64,imm		\324\366\3\x0F\x3A\x22\110\26	SSE41,X64 | ||||||
|  | PMAXSB		xmmreg,xmmrm		\366\3\x0F\x38\x3C\110		SSE41 | ||||||
|  | PMAXSD		xmmreg,xmmrm		\366\3\x0F\x38\x3D\110		SSE41 | ||||||
|  | PMAXUD		xmmreg,xmmrm		\366\3\x0F\x38\x3F\110		SSE41 | ||||||
|  | PMAXUW		xmmreg,xmmrm		\366\3\x0F\x38\x3E\110		SSE41 | ||||||
|  | PMINSB		xmmreg,xmmrm		\366\3\x0F\x38\x38\110		SSE41 | ||||||
|  | PMINSD		xmmreg,xmmrm		\366\3\x0F\x38\x39\110		SSE41 | ||||||
|  | PMINUD		xmmreg,xmmrm		\366\3\x0F\x38\x3B\110		SSE41 | ||||||
|  | PMINUW		xmmreg,xmmrm		\366\3\x0F\x38\x3A\110		SSE41 | ||||||
|  | PMOVSXBW	xmmreg,xmmrm		\366\3\x0F\x38\x20\110		SSE41,SQ | ||||||
|  | PMOVSXBD	xmmreg,xmmrm		\366\3\x0F\x38\x21\110		SSE41,SD | ||||||
|  | PMOVSXBQ	xmmreg,xmmrm		\366\3\x0F\x38\x22\110		SSE41,SW | ||||||
|  | PMOVSXWD	xmmreg,xmmrm		\366\3\x0F\x38\x23\110		SSE41,SQ | ||||||
|  | PMOVSXWQ	xmmreg,xmmrm		\366\3\x0F\x38\x24\110		SSE41,SD | ||||||
|  | PMOVSXDQ	xmmreg,xmmrm		\366\3\x0F\x38\x25\110		SSE41,SQ | ||||||
|  | PMOVZXBW	xmmreg,xmmrm		\366\3\x0F\x38\x30\110		SSE41,SQ | ||||||
|  | PMOVZXBD	xmmreg,xmmrm		\366\3\x0F\x38\x31\110		SSE41,SD | ||||||
|  | PMOVZXBQ	xmmreg,xmmrm		\366\3\x0F\x38\x32\110		SSE41,SW | ||||||
|  | PMOVZXWD	xmmreg,xmmrm		\366\3\x0F\x38\x33\110		SSE41,SQ | ||||||
|  | PMOVZXWQ	xmmreg,xmmrm		\366\3\x0F\x38\x34\110		SSE41,SD | ||||||
|  | PMOVZXDQ	xmmreg,xmmrm		\366\3\x0F\x38\x35\110		SSE41,SQ | ||||||
|  | PMULDQ		xmmreg,xmmrm		\366\3\x0F\x38\x28\110		SSE41 | ||||||
|  | PMULLD		xmmreg,xmmrm		\366\3\x0F\x38\x40\110		SSE41 | ||||||
|  | PTEST		xmmreg,xmmrm		\366\3\x0F\x38\x17\110		SSE41 | ||||||
|  | ROUNDPD		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x09\110\26	SSE41 | ||||||
|  | ROUNDPS		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x08\110\26	SSE41 | ||||||
|  | ROUNDSD		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x0B\110\26	SSE41 | ||||||
|  | ROUNDSS		xmmreg,xmmrm,imm	\366\3\x0F\x3A\x0A\110\26	SSE41 | ||||||
|  |  | ||||||
|  | ; Nehalem New Instructions (SSE4.2) | ||||||
|  | CRC32		reg32,rm8		\332\3\x0F\x38\1\xF0\110	SSE42 | ||||||
|  | CRC32		reg32,rm16		\332\3\x0F\x38\1\xF1\110	SSE42 | ||||||
|  | CRC32		reg32,rm32		\332\3\x0F\x38\1\xF1\110	SSE42 | ||||||
|  | CRC32		reg64,rm8		\324\332\3\x0F\x38\1\xF0\110	SSE42,X64 | ||||||
|  | CRC32		reg64,rm64		\324\332\3\x0F\x38\1\xF1\110	SSE42,X64 | ||||||
|  | PCMPESTRI	xmmreg,xmmrm,imm	\366\3\x0F\x3A\x61\110\26	SSE42 | ||||||
|  | PCMPESTRM	xmmreg,xmmrm,imm	\366\3\x0F\x3A\x60\110\26	SSE42 | ||||||
|  | PCMPISTRI	xmmreg,xmmrm,imm	\366\3\x0F\x3A\x63\110\26	SSE42 | ||||||
|  | PCMPISTRM	xmmreg,xmmrm,imm	\366\3\x0F\x3A\x62\110\26	SSE42 | ||||||
|  | PCMPGTQ		xmmreg,xmmrm		\366\3\x0F\x38\x37\110		SSE42 | ||||||
|  | POPCNT		reg16,rm16		\320\333\2\x0F\xB8\110		NEHALEM | ||||||
|  | POPCNT		reg32,rm32		\321\333\2\x0F\xB8\110		NEHALEM | ||||||
|  | POPCNT		reg64,rm32		\324\333\2\x0F\xB8\110		NEHALEM,X64 | ||||||
|   | |||||||
							
								
								
									
										1
									
								
								insns.h
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								insns.h
									
									
									
									
									
								
							| @@ -103,6 +103,7 @@ extern const struct itemplate * const * const itable[]; | |||||||
| #define IF_WILLAMETTE 0x08000000UL      /* Willamette instructions */ | #define IF_WILLAMETTE 0x08000000UL      /* Willamette instructions */ | ||||||
| #define IF_PRESCOTT   0x09000000UL      /* Prescott instructions */ | #define IF_PRESCOTT   0x09000000UL      /* Prescott instructions */ | ||||||
| #define IF_X86_64 0x0A000000UL	/* x86-64 instruction (long or legacy mode) */ | #define IF_X86_64 0x0A000000UL	/* x86-64 instruction (long or legacy mode) */ | ||||||
|  | #define IF_NEHALEM 0x0B000000UL  /* Nehalem instruction */ | ||||||
| #define IF_X64	  (IF_LONG|IF_X86_64) | #define IF_X64	  (IF_LONG|IF_X86_64) | ||||||
| #define IF_IA64   0x0F000000UL  /* IA64 instructions (in x86 mode) */ | #define IF_IA64   0x0F000000UL  /* IA64 instructions (in x86 mode) */ | ||||||
| #define IF_CYRIX  0x10000000UL  /* Cyrix-specific instruction */ | #define IF_CYRIX  0x10000000UL  /* Cyrix-specific instruction */ | ||||||
|   | |||||||
							
								
								
									
										4
									
								
								nasm.h
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								nasm.h
									
									
									
									
									
								
							| @@ -422,6 +422,9 @@ enum { | |||||||
|  * With FPUREG: |  * With FPUREG: | ||||||
|  * 16: FPU0 |  * 16: FPU0 | ||||||
|  * |  * | ||||||
|  |  * With XMMREG: | ||||||
|  |  * 16: XMM0 | ||||||
|  |  * | ||||||
|  * With MEMORY: |  * With MEMORY: | ||||||
|  * 16: MEM_OFFS (this is a simple offset) |  * 16: MEM_OFFS (this is a simple offset) | ||||||
|  * 17: IP_REL (IP-relative offset) |  * 17: IP_REL (IP-relative offset) | ||||||
| @@ -485,6 +488,7 @@ enum { | |||||||
| #define MMXREG    	0x02009000L   /* MMX register */ | #define MMXREG    	0x02009000L   /* MMX register */ | ||||||
| #define RM_XMM		0x04008000L   /* XMM (SSE) operand */ | #define RM_XMM		0x04008000L   /* XMM (SSE) operand */ | ||||||
| #define XMMREG    	0x04009000L   /* XMM (SSE) register */ | #define XMMREG    	0x04009000L   /* XMM (SSE) register */ | ||||||
|  | #define XMM0		0x04019000L   /* XMM register zero */ | ||||||
| #define REG_CDT   	0x00101004L   /* CRn, DRn and TRn */ | #define REG_CDT   	0x00101004L   /* CRn, DRn and TRn */ | ||||||
| #define REG_CREG	0x00111004L   /* CRn */ | #define REG_CREG	0x00111004L   /* CRn */ | ||||||
| #define REG_DREG	0x00121004L   /* DRn */ | #define REG_DREG	0x00121004L   /* DRn */ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user