diff --git a/gas/ChangeLog b/gas/ChangeLog
index 9b9d7d6980797ae8c9b8a1b73fb0d2159df6b496..39bc794404377ffc8a433b7a6bc34f391a619a27 100644
--- a/gas/ChangeLog
+++ b/gas/ChangeLog
@@ -1,3 +1,19 @@
+2004-11-23 Jan Beulich <jbeulich@novell.com>
+
+	* config/tc-i386.h (CpuMMX2): Declare. Artificial classifier to
+	indicate the MMX extensions added by both SSE and 3DNow!A.
+	(Cpu3dnowA): Declare.
+	(CpuUnknownFlags): Update.
+	* config/tc-i386.c (cpu_sub_arch_name): Declare.
+	(cpu_arch): i586 and pentium do not imply MMX. i686 and pentiumpro do
+	neither imply SSE nor MMX. k6 implies MMX. k6_2 additionally implies
+	3DNow!. Athlon additionally implies 3DNow!A. Several new
+	entries (those starting with a dot are for sub-arch specification).
+	(set_cpu_arch): Handle sub-arch specifications.
+	(parse_insn): Distinguish between instructions not supported because
+	of insufficient CPU features and because of 64-bit mode.
+	* doc/c-i386.texi: Describe enhanced .arch directive.
+
 2004-11-22  Bob Wilson  <bob.wilson@acm.org>
 
 	* config/tc-xtensa.c (xg_add_opcode_fix): Set fx_no_overflow.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index aa2ec54e0471fd457fdf9c9d415b5dc132b58881..430a3389165be0404ef975d0922c620aa6668cce 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -314,6 +314,7 @@ static int quiet_warnings = 0;
 
 /* CPU name.  */
 static const char *cpu_arch_name = NULL;
+static const char *cpu_sub_arch_name = NULL;
 
 /* CPU feature flags.  */
 static unsigned int cpu_arch_flags = CpuUnknownFlags | CpuNo64;
@@ -416,14 +417,24 @@ static const arch_entry cpu_arch[] = {
   {"i286",	Cpu086|Cpu186|Cpu286 },
   {"i386",	Cpu086|Cpu186|Cpu286|Cpu386 },
   {"i486",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486 },
-  {"i586",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuMMX },
-  {"i686",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuSSE },
-  {"pentium",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuMMX },
-  {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuSSE },
-  {"pentium4",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuSSE|CpuSSE2 },
-  {"k6",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
-  {"athlon",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|Cpu3dnow },
-  {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|Cpu3dnow|CpuSSE|CpuSSE2 },
+  {"i586",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
+  {"i686",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
+  {"pentium",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586 },
+  {"pentiumpro",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 },
+  {"pentiumii",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX },
+  {"pentiumiii",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuMMX|CpuMMX2|CpuSSE },
+  {"pentium4",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
+  {"prescott",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI },
+  {"k6",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX },
+  {"k6_2",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuK6|CpuMMX|Cpu3dnow },
+  {"athlon",	Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
+  {"sledgehammer",Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuK6|CpuAthlon|CpuSledgehammer|CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA|CpuSSE|CpuSSE2 },
+  {".mmx",	CpuMMX },
+  {".sse",	CpuMMX|CpuMMX2|CpuSSE },
+  {".sse2",	CpuMMX|CpuMMX2|CpuSSE|CpuSSE2 },
+  {".3dnow",	CpuMMX|Cpu3dnow },
+  {".3dnowa",	CpuMMX|CpuMMX2|Cpu3dnow|Cpu3dnowA },
+  {".padlock",	CpuPadLock },
   {NULL, 0 }
 };
 
@@ -836,10 +847,22 @@ set_cpu_arch (dummy)
 	{
 	  if (strcmp (string, cpu_arch[i].name) == 0)
 	    {
-	      cpu_arch_name = cpu_arch[i].name;
-	      cpu_arch_flags = (cpu_arch[i].flags
-				| (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
-	      break;
+	      if (*string != '.')
+		{
+		  cpu_arch_name = cpu_arch[i].name;
+		  cpu_sub_arch_name = NULL;
+		  cpu_arch_flags = (cpu_arch[i].flags
+				    | (flag_code == CODE_64BIT ? Cpu64 : CpuNo64));
+		  break;
+		}
+	      if ((cpu_arch_flags | cpu_arch[i].flags) != cpu_arch_flags)
+		{
+		  cpu_sub_arch_name = cpu_arch[i].name;
+		  cpu_arch_flags |= cpu_arch[i].flags;
+		}
+	      *input_line_pointer = e;
+	      demand_empty_rest_of_line ();
+	      return;
 	    }
 	}
       if (!cpu_arch[i].name)
@@ -1561,6 +1584,8 @@ parse_insn (line, mnemonic)
   char *l = line;
   char *token_start = l;
   char *mnem_p;
+  int supported;
+  const template *t;
 
   /* Non-zero if we found a prefix only acceptable with string insns.  */
   const char *expecting_string_instruction = NULL;
@@ -1709,11 +1734,29 @@ parse_insn (line, mnemonic)
     }
 
   /* Check if instruction is supported on specified architecture.  */
-  if ((current_templates->start->cpu_flags & ~(Cpu64 | CpuNo64))
-      & ~(cpu_arch_flags & ~(Cpu64 | CpuNo64)))
+  supported = 0;
+  for (t = current_templates->start; t < current_templates->end; ++t)
+    {
+      if (!((t->cpu_flags & ~(Cpu64 | CpuNo64))
+	    & ~(cpu_arch_flags & ~(Cpu64 | CpuNo64))))
+	  supported |= 1;
+      if (!(t->cpu_flags & (flag_code == CODE_64BIT ? CpuNo64 : Cpu64)))
+	  supported |= 2;
+    }
+  if (!(supported & 2))
+    {
+      as_bad (flag_code == CODE_64BIT
+	      ? _("`%s' is not supported in 64-bit mode")
+	      : _("`%s' is only supported in 64-bit mode"),
+	      current_templates->start->name);
+      return NULL;
+    }
+  if (!(supported & 1))
     {
-      as_warn (_("`%s' is not supported on `%s'"),
-	       current_templates->start->name, cpu_arch_name);
+      as_warn (_("`%s' is not supported on `%s%s'"),
+	       current_templates->start->name,
+	       cpu_arch_name,
+	       cpu_sub_arch_name ? cpu_sub_arch_name : "");
     }
   else if ((Cpu386 & ~cpu_arch_flags) && (flag_code != CODE_16BIT))
     {
diff --git a/gas/config/tc-i386.h b/gas/config/tc-i386.h
index 3b377cc39df0364209dc9eebc6fcec92288840aa..e56130c175860c5c7000e68bc4e917488a8130fe 100644
--- a/gas/config/tc-i386.h
+++ b/gas/config/tc-i386.h
@@ -174,18 +174,20 @@ typedef struct
 #define CpuAthlon	0x200	/* AMD Athlon or better required*/
 #define CpuSledgehammer 0x400	/* Sledgehammer or better required */
 #define CpuMMX		0x800	/* MMX support required */
-#define CpuSSE	       0x1000	/* Streaming SIMD extensions required */
-#define CpuSSE2	       0x2000	/* Streaming SIMD extensions 2 required */
-#define Cpu3dnow       0x4000	/* 3dnow! support required */
-#define CpuPNI	       0x8000	/* Prescott New Instructions required */
-#define CpuPadLock    0x10000	/* VIA PadLock required */
+#define CpuMMX2	       0x1000	/* extended MMX support (with SSE or 3DNow!Ext) required */
+#define CpuSSE	       0x2000	/* Streaming SIMD extensions required */
+#define CpuSSE2	       0x4000	/* Streaming SIMD extensions 2 required */
+#define Cpu3dnow       0x8000	/* 3dnow! support required */
+#define Cpu3dnowA     0x10000	/* 3dnow!Extensions support required */
+#define CpuPNI	      0x20000	/* Prescott New Instructions required */
+#define CpuPadLock    0x40000	/* VIA PadLock required */
 
   /* These flags are set by gas depending on the flag_code.  */
 #define Cpu64	     0x4000000   /* 64bit support required  */
 #define CpuNo64      0x8000000   /* Not supported in the 64bit mode  */
 
   /* The default value for unknown CPUs - enable all features to avoid problems.  */
-#define CpuUnknownFlags (Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuSledgehammer|CpuMMX|CpuSSE|CpuSSE2|CpuPNI|Cpu3dnow|CpuK6|CpuAthlon|CpuPadLock)
+#define CpuUnknownFlags (Cpu086|Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686|CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuPNI|Cpu3dnow|Cpu3dnowA|CpuK6|CpuAthlon|CpuPadLock)
 
   /* the bits in opcode_modifier are used to generate the final opcode from
      the base_opcode.  These bits also are used to detect alternate forms of
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index f9fc55b1218b033e8683057c3c376ef8eed52982..c561bcb350171bd7aa95f4229aa4903a6cf9d507 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -691,15 +691,16 @@ register is @samp{%st(i)}.
 @cindex x86-64 arch directive
 
 @code{@value{AS}} may be told to assemble for a particular CPU
-architecture with the @code{.arch @var{cpu_type}} directive.  This
+(sub-)architecture with the @code{.arch @var{cpu_type}} directive.  This
 directive enables a warning when gas detects an instruction that is not
 supported on the CPU specified.  The choices for @var{cpu_type} are:
 
 @multitable @columnfractions .20 .20 .20 .20
 @item @samp{i8086} @tab @samp{i186} @tab @samp{i286} @tab @samp{i386}
 @item @samp{i486} @tab @samp{i586} @tab @samp{i686} @tab @samp{pentium}
-@item @samp{pentiumpro} @tab @samp{pentium4} @tab @samp{k6} @tab @samp{athlon}
-@item @samp{sledgehammer}
+@item @samp{pentiumpro} @tab @samp{pentiumii} @tab @samp{pentiumiii} @tab @samp{pentium4}
+@item @samp{k6} @tab @samp{athlon} @samp{sledgehammer}
+@item @samp{.mmx} @samp{.sse} @samp{.sse2} @samp{.3dnow}
 @end multitable
 
 Apart from the warning, there are only two other effects on
@@ -715,13 +716,14 @@ conditional jumps will be promoted when necessary to a two instruction
 sequence consisting of a conditional jump of the opposite sense around
 an unconditional jump to the target.
 
-Following the CPU architecture, you may specify @samp{jumps} or
-@samp{nojumps} to control automatic promotion of conditional jumps.
-@samp{jumps} is the default, and enables jump promotion;  All external
-jumps will be of the long variety, and file-local jumps will be promoted
-as necessary.  (@pxref{i386-Jumps})  @samp{nojumps} leaves external
-conditional jumps as byte offset jumps, and warns about file-local
-conditional jumps that @code{@value{AS}} promotes.
+Following the CPU architecture (but not a sub-architecture, which are those
+starting with a dot), you may specify @samp{jumps} or @samp{nojumps} to
+control automatic promotion of conditional jumps. @samp{jumps} is the
+default, and enables jump promotion;  All external jumps will be of the long
+variety, and file-local jumps will be promoted as necessary.
+(@pxref{i386-Jumps})  @samp{nojumps} leaves external conditional jumps as
+byte offset jumps, and warns about file-local conditional jumps that
+@code{@value{AS}} promotes.
 Unconditional jumps are treated as for @samp{jumps}.
 
 For example
diff --git a/include/opcode/ChangeLog b/include/opcode/ChangeLog
index 3429c73e40041e1c4b4eb70173badcc93ef13718..14bd353ac3497da5343756d437c84a9a15f7b674 100644
--- a/include/opcode/ChangeLog
+++ b/include/opcode/ChangeLog
@@ -1,3 +1,11 @@
+2004-11-23 Jan Beulich <jbeulich@novell.com>
+
+	* i386.h (i386_optab): paddq and psubq, even in their MMX form, are
+	available only with SSE2. Change the MMX additions introduced by SSE
+	and 3DNow!A to CpuMMX2 (rather than CpuMMX). Indicate the 3DNow!A
+	instructions by their now designated identifier (since combining i686
+	and 3DNow! does not really imply 3DNow!A).
+
 2004-11-19  Alan Modra  <amodra@bigpond.net.au>
 
 	* msp430.h (struct rcodes_s, MSP430_RLC, msp430_rcodes,
diff --git a/include/opcode/i386.h b/include/opcode/i386.h
index fc2f5f0b557c2bd9767cc1a35460886966feec53..1a88da44ca6cb9a78d1b62e7201771c0bfe2a4a0 100644
--- a/include/opcode/i386.h
+++ b/include/opcode/i386.h
@@ -1020,7 +1020,7 @@ static const template i386_optab[] =
 {"paddw",    2, 0x660ffd,X,CpuSSE2,NoSuf|IgnoreSize|Modrm,		{ RegXMM|LLongMem, RegXMM, 0 } },
 {"paddd",    2, 0x0ffe, X, CpuMMX, NoSuf|IgnoreSize|Modrm,		{ RegMMX|LongMem, RegMMX, 0 } },
 {"paddd",    2, 0x660ffe,X,CpuSSE2,NoSuf|IgnoreSize|Modrm,		{ RegXMM|LLongMem, RegXMM, 0 } },
-{"paddq",    2, 0x0fd4, X, CpuMMX, NoSuf|IgnoreSize|Modrm,		{ RegMMX|LLongMem, RegMMX, 0 } },
+{"paddq",    2, 0x0fd4, X, CpuSSE2,NoSuf|IgnoreSize|Modrm,		{ RegMMX|LLongMem, RegMMX, 0 } },
 {"paddq",    2, 0x660fd4,X,CpuSSE2,NoSuf|IgnoreSize|Modrm,		{ RegXMM|LLongMem, RegXMM, 0 } },
 {"paddsb",   2, 0x0fec, X, CpuMMX, NoSuf|IgnoreSize|Modrm,		{ RegMMX|LongMem, RegMMX, 0 } },
 {"paddsb",   2, 0x660fec,X,CpuSSE2,NoSuf|IgnoreSize|Modrm,		{ RegXMM|LLongMem, RegXMM, 0 } },
@@ -1151,7 +1151,7 @@ static const template i386_optab[] =
 {"divps",     2, 0x0f5e,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"divss",     2, 0xf30f5e,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
 {"ldmxcsr",   1, 0x0fae,    2, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ WordMem, 0, 0 } },
-{"maskmovq",  2, 0x0ff7,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|InvMem, RegMMX, 0 } },
+{"maskmovq",  2, 0x0ff7,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|InvMem, RegMMX, 0 } },
 {"maxps",     2, 0x0f5f,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"maxss",     2, 0xf30f5f,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
 {"minps",     2, 0x0f5d,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
@@ -1166,7 +1166,7 @@ static const template i386_optab[] =
 {"movlps",    2, 0x0f13,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM, LLongMem, 0 } },
 {"movmskps",  2, 0x0f50,    X, CpuSSE, lq_Suf|IgnoreSize|Modrm,	{ RegXMM|InvMem, Reg32|Reg64, 0 } },
 {"movntps",   2, 0x0f2b,    X, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ RegXMM, LLongMem, 0 } },
-{"movntq",    2, 0x0fe7,    X, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ RegMMX, LLongMem, 0 } },
+{"movntq",    2, 0x0fe7,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm, 	{ RegMMX, LLongMem, 0 } },
 {"movntdq",   2, 0x660fe7,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm, 	{ RegXMM, LLongMem, 0 } },
 {"movss",     2, 0xf30f10,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
 {"movss",     2, 0xf30f11,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM, RegXMM|WordMem, 0 } },
@@ -1175,38 +1175,38 @@ static const template i386_optab[] =
 {"mulps",     2, 0x0f59,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"mulss",     2, 0xf30f59,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
 {"orps",      2, 0x0f56,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pavgb",     2, 0x0fe0,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pavgb",     2, 0x0fe0,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pavgb",     2, 0x660fe0,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pavgw",     2, 0x0fe3,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pavgw",     2, 0x0fe3,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pavgw",     2, 0x660fe3,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pextrw",    3, 0x0fc5,    X, CpuSSE, lq_Suf|IgnoreSize|Modrm,	{ Imm8, RegMMX|InvMem, Reg32|Reg64 } },
+{"pextrw",    3, 0x0fc5,    X, CpuMMX2,lq_Suf|IgnoreSize|Modrm,	{ Imm8, RegMMX|InvMem, Reg32|Reg64 } },
 {"pextrw",    3, 0x660fc5,  X, CpuSSE2,lq_Suf|IgnoreSize|Modrm,	{ Imm8, RegXMM|InvMem, Reg32|Reg64 } },
-{"pinsrw",    3, 0x0fc4,    X, CpuSSE, lq_Suf|IgnoreSize|Modrm,	{ Imm8, Reg32|Reg64|ShortMem, RegMMX } },
+{"pinsrw",    3, 0x0fc4,    X, CpuMMX2,lq_Suf|IgnoreSize|Modrm,	{ Imm8, Reg32|Reg64|ShortMem, RegMMX } },
 {"pinsrw",    3, 0x660fc4,  X, CpuSSE2,lq_Suf|IgnoreSize|Modrm,	{ Imm8, Reg32|Reg64|ShortMem, RegXMM } },
-{"pmaxsw",    2, 0x0fee,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pmaxsw",    2, 0x0fee,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pmaxsw",    2, 0x660fee,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pmaxub",    2, 0x0fde,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pmaxub",    2, 0x0fde,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pmaxub",    2, 0x660fde,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pminsw",    2, 0x0fea,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pminsw",    2, 0x0fea,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pminsw",    2, 0x660fea,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pminub",    2, 0x0fda,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pminub",    2, 0x0fda,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pminub",    2, 0x660fda,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pmovmskb",  2, 0x0fd7,    X, CpuSSE, lq_Suf|IgnoreSize|Modrm,	{ RegMMX|InvMem, Reg32|Reg64, 0 } },
+{"pmovmskb",  2, 0x0fd7,    X, CpuMMX2,lq_Suf|IgnoreSize|Modrm,	{ RegMMX|InvMem, Reg32|Reg64, 0 } },
 {"pmovmskb",  2, 0x660fd7,  X, CpuSSE2,lq_Suf|IgnoreSize|Modrm,	{ RegXMM|InvMem, Reg32|Reg64, 0 } },
-{"pmulhuw",   2, 0x0fe4,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"pmulhuw",   2, 0x0fe4,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"pmulhuw",   2, 0x660fe4,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"prefetchnta", 1, 0x0f18,  0, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
-{"prefetcht0",  1, 0x0f18,  1, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
-{"prefetcht1",  1, 0x0f18,  2, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
-{"prefetcht2",  1, 0x0f18,  3, CpuSSE, NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
-{"psadbw",    2, 0x0ff6,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
+{"prefetchnta", 1, 0x0f18,  0, CpuMMX2,NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
+{"prefetcht0",  1, 0x0f18,  1, CpuMMX2,NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
+{"prefetcht1",  1, 0x0f18,  2, CpuMMX2,NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
+{"prefetcht2",  1, 0x0f18,  3, CpuMMX2,NoSuf|IgnoreSize|Modrm, 	{ LLongMem, 0, 0 } },
+{"psadbw",    2, 0x0ff6,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ RegMMX|LLongMem, RegMMX, 0 } },
 {"psadbw",    2, 0x660ff6,  X, CpuSSE2,NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
-{"pshufw",    3, 0x0f70,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ Imm8, RegMMX|LLongMem, RegMMX } },
+{"pshufw",    3, 0x0f70,    X, CpuMMX2,NoSuf|IgnoreSize|Modrm,	{ Imm8, RegMMX|LLongMem, RegMMX } },
 {"rcpps",     2, 0x0f53,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"rcpss",     2, 0xf30f53,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
 {"rsqrtps",   2, 0x0f52,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"rsqrtss",   2, 0xf30f52,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
-{"sfence",    0, 0x0fae, 0xf8, CpuSSE, NoSuf|IgnoreSize|ImmExt,	{ 0, 0, 0 } },
+{"sfence",    0, 0x0fae, 0xf8, CpuMMX2,NoSuf|IgnoreSize|ImmExt,	{ 0, 0, 0 } },
 {"shufps",    3, 0x0fc6,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ Imm8, RegXMM|LLongMem, RegXMM } },
 {"sqrtps",    2, 0x0f51,    X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|LLongMem, RegXMM, 0 } },
 {"sqrtss",    2, 0xf30f51,  X, CpuSSE, NoSuf|IgnoreSize|Modrm,	{ RegXMM|WordMem, RegXMM, 0 } },
@@ -1341,7 +1341,7 @@ static const template i386_optab[] =
 {"femms",    0, 0x0f0e,	   X, Cpu3dnow, NoSuf,			{ 0, 0, 0 } },
 {"pavgusb",  2, 0x0f0f, 0xbf, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pf2id",    2, 0x0f0f, 0x1d, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
-{"pf2iw",    2, 0x0f0f, 0x1c, Cpu3dnow|Cpu686,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
+{"pf2iw",    2, 0x0f0f, 0x1c, Cpu3dnowA,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfacc",    2, 0x0f0f, 0xae, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfadd",    2, 0x0f0f, 0x9e, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfcmpeq",  2, 0x0f0f, 0xb0, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
@@ -1350,8 +1350,8 @@ static const template i386_optab[] =
 {"pfmax",    2, 0x0f0f, 0xa4, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfmin",    2, 0x0f0f, 0x94, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfmul",    2, 0x0f0f, 0xb4, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
-{"pfnacc",   2, 0x0f0f, 0x8a, Cpu3dnow|Cpu686,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
-{"pfpnacc",  2, 0x0f0f, 0x8e, Cpu3dnow|Cpu686,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
+{"pfnacc",   2, 0x0f0f, 0x8a, Cpu3dnowA,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
+{"pfpnacc",  2, 0x0f0f, 0x8e, Cpu3dnowA,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfrcp",    2, 0x0f0f, 0x96, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfrcpit1", 2, 0x0f0f, 0xa6, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfrcpit2", 2, 0x0f0f, 0xb6, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
@@ -1360,9 +1360,9 @@ static const template i386_optab[] =
 {"pfsub",    2, 0x0f0f, 0x9a, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pfsubr",   2, 0x0f0f, 0xaa, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pi2fd",    2, 0x0f0f, 0x0d, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
-{"pi2fw",    2, 0x0f0f, 0x0c, Cpu3dnow|Cpu686,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
+{"pi2fw",    2, 0x0f0f, 0x0c, Cpu3dnowA,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 {"pmulhrw",  2, 0x0f0f, 0xb7, Cpu3dnow, NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
-{"pswapd",   2, 0x0f0f, 0xbb, Cpu3dnow|Cpu686,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
+{"pswapd",   2, 0x0f0f, 0xbb, Cpu3dnowA,NoSuf|IgnoreSize|Modrm|ImmExt,	{ RegMMX|LongMem, RegMMX, 0 } },
 
 /* AMD extensions. */
 {"syscall",  0, 0x0f05,    X, CpuK6,	NoSuf,			{ 0, 0, 0} },