diff -urN src-0.8.22/src/Makefile.in src-0.8.22-mmu/src/Makefile.in
--- src-0.8.22/src/Makefile.in	2001-11-19 13:34:18.000000000 +0100
+++ src-0.8.22-mmu/src/Makefile.in	2003-07-25 12:11:11.000000000 +0200
@@ -33,7 +33,7 @@
 
 INCLUDES=-I. -I@top_srcdir@/src/include/
 
-OBJS = main.o newcpu.o memory.o @CPUOBJS@ custom.o cia.o serial.o blitter.o \
+OBJS = main.o newcpu.o mmu.o memory.o @CPUOBJS@ custom.o cia.o serial.o blitter.o \
        autoconf.o ersatz.o filesys.o hardfile.o keybuf.o expansion.o zfile.o \
        fpp.o readcpu.o cpudefs.o gfxutil.o gfxlib.o blitfunc.o blittable.o \
        disk.o audio.o compiler.o uaelib.o drawing.o picasso96.o cpustbl.o \
@@ -104,7 +104,7 @@
 	$(MAKE) -C tools build68kc
 tools/cpuopti:
 	$(MAKE) -C tools cpuopti
-tools/gencpu: 
+tools/gencpu: gencpu.c
 	$(MAKE) -C tools gencpu
 
 custom.o: blit.h
@@ -113,14 +113,14 @@
 cpudefs.c: tools/build68k @top_srcdir@/src/table68k
 	./tools/build68k <@top_srcdir@/src/table68k >cpudefs.c
 
-cpuemu.c: tools/gencpu
+cpuemu.c: tools/gencpu @top_srcdir@/src/table68k
 	./tools/gencpu
 
 # gencpu also creates cpustbl.c and cputbl.h
-cpustbl.c: cpuemu.c
-cputbl.h: cpuemu.c
+cpustbl.c: cpuemu.c @top_srcdir@/src/table68k
+cputbl.h: cpuemu.c @top_srcdir@/src/table68k
 
-cpufast.s: cpuemu.c tools/cpuopti
+cpufast.s: cpuemu.c tools/cpuopti @top_srcdir@/src/table68k
 	$(CC) $(INCLUDES) -S $(INCDIRS) $(CFLAGS) $(X_CFLAGS) $(DEBUGFLAGS) $(NO_SCHED_CFLAGS) $< -o cputmp.s
 	./tools/cpuopti <cputmp.s >$@
 	rm cputmp.s
diff -urN src-0.8.22/src/custom.c src-0.8.22-mmu/src/custom.c
--- src-0.8.22/src/custom.c	2002-04-07 15:47:50.000000000 +0200
+++ src-0.8.22-mmu/src/custom.c	2003-07-25 12:13:07.000000000 +0200
@@ -4052,6 +4052,7 @@
 
     n_frames = 0;
 
+    mmu_set_tc(0);
     expamem_reset ();
 
     DISK_reset ();
@@ -4514,8 +4515,10 @@
     uae_u16 rval = (value << 8) | (value & 0xFF);
     special_mem |= S_WRITE;
     custom_wput (addr, rval);
-    if (!warned)
-	write_log ("Byte put to custom register.\n"), warned++;
+    if (!warned || ((addr & 0xff0000) == 0xda0000))	{
+        write_log ("Byte put to custom register (addr=%lx val=%lx)\n", addr, value);
+	warned++;
+    }
 }
 
 void REGPARAM2 custom_lput(uaecptr addr, uae_u32 value)
diff -urN src-0.8.22/src/custom.c~ src-0.8.22-mmu/src/custom.c~
--- src-0.8.22/src/custom.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/custom.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,4878 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * Custom chip emulation
+  *
+  * Copyright 1995-2002 Bernd Schmidt
+  * Copyright 1995 Alessandro Bissacco
+  * Copyright 2000-2002 Toni Wilen
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include <ctype.h>
+#include <assert.h>
+
+#include "config.h"
+#include "options.h"
+#include "threaddep/thread.h"
+#include "uae.h"
+#include "gensound.h"
+#include "sounddep/sound.h"
+#include "events.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "cia.h"
+#include "disk.h"
+#include "blitter.h"
+#include "xwin.h"
+#include "joystick.h"
+#include "audio.h"
+#include "keybuf.h"
+#include "serial.h"
+#include "osemu.h"
+#include "autoconf.h"
+#include "gui.h"
+#include "picasso96.h"
+#include "drawing.h"
+#include "savestate.h"
+
+#define SPRITE_COLLISIONS
+
+static uae_u16 last_custom_value;
+
+static unsigned int n_consecutive_skipped = 0;
+static unsigned int total_skipped = 0;
+
+/* Mouse and joystick emulation */
+
+int buttonstate[3];
+static int mouse_x, mouse_y;
+int joy0button, joy1button;
+unsigned int joy0dir, joy1dir;
+
+/* Events */
+
+unsigned long int currcycle, nextevent, is_lastline;
+static int rpt_did_reset;
+struct ev eventtab[ev_max];
+
+frame_time_t vsynctime, vsyncmintime;
+
+static int vpos;
+static uae_u16 lof;
+static int next_lineno;
+static enum nln_how nextline_how;
+static int lof_changed = 0;
+
+static uae_u32 sprtaba[256],sprtabb[256];
+static uae_u32 sprite_ab_merge[256];
+/* Tables for collision detection.  */
+static uae_u32 sprclx[16], clxmask[16];
+
+/*
+ * Hardware registers of all sorts.
+ */
+
+static void custom_wput_1 (int, uaecptr, uae_u32) REGPARAM;
+
+static uae_u16 cregs[256];
+
+uae_u16 intena,intreq;
+uae_u16 dmacon;
+uae_u16 adkcon; /* used by audio code */
+
+static uae_u32 cop1lc,cop2lc,copcon;
+ 
+int maxhpos = MAXHPOS_PAL;
+int maxvpos = MAXVPOS_PAL;
+int minfirstline = MINFIRSTLINE_PAL;
+int vblank_endline = VBLANK_ENDLINE_PAL;
+int vblank_hz = VBLANK_HZ_PAL;
+unsigned long syncbase;
+static int fmode;
+static unsigned int beamcon0, new_beamcon0;
+
+#define MAX_SPRITES 8
+
+/* This is but an educated guess. It seems to be correct, but this stuff
+ * isn't documented well. */
+enum sprstate { SPR_restart, SPR_waiting_start, SPR_waiting_stop };
+
+struct sprite {
+    uaecptr pt;
+    int xpos;
+    int vstart;
+    int vstop;
+    int armed;
+    enum sprstate state;
+};
+
+static struct sprite spr[8];
+
+static int sprite_vblank_endline = 25;
+
+static unsigned int sprctl[MAX_SPRITES], sprpos[MAX_SPRITES];
+static uae_u16 sprdata[MAX_SPRITES][4], sprdatb[MAX_SPRITES][4];
+static int sprite_last_drawn_at[MAX_SPRITES];
+static int last_sprite_point, nr_armed;
+static int sprite_width, sprres, sprite_buffer_res;
+
+static uae_u32 bpl1dat, bpl2dat, bpl3dat, bpl4dat, bpl5dat, bpl6dat, bpl7dat, bpl8dat;
+static uae_s16 bpl1mod, bpl2mod;
+
+static uaecptr bplpt[8];
+uae_u8 *real_bplpt[8];
+/* Used as a debugging aid, to offset any bitplane temporarily.  */
+int bpl_off[8];
+
+/*static int blitcount[256];  blitter debug */
+
+static struct color_entry current_colors;
+static unsigned int bplcon0, bplcon1, bplcon2, bplcon3, bplcon4;
+static unsigned int diwstrt, diwstop, diwhigh;
+static int diwhigh_written;
+static unsigned int ddfstrt, ddfstop;
+
+/* The display and data fetch windows */
+
+enum diw_states
+{
+    DIW_waiting_start, DIW_waiting_stop
+};
+
+static int plffirstline, plflastline;
+static int plfstrt, plfstop;
+static int last_diw_pix_hpos, last_ddf_pix_hpos, last_decide_line_hpos;
+static int last_fetch_hpos, last_sprite_hpos;
+int diwfirstword, diwlastword;
+static enum diw_states diwstate, hdiwstate;
+
+/* Sprite collisions */
+static unsigned int clxdat, clxcon, clxcon2, clxcon_bpl_enable, clxcon_bpl_match;
+static int clx_sprmask;
+
+enum copper_states {
+    COP_stop,
+    COP_read1_in2,
+    COP_read1_wr_in4,
+    COP_read1_wr_in2,
+    COP_read1,
+    COP_read2_wr_in2,
+    COP_read2,
+    COP_bltwait,
+    COP_wait_in4,
+    COP_wait_in2,
+    COP_skip_in4,
+    COP_skip_in2,
+    COP_wait1,
+    COP_wait
+};
+
+struct copper {
+    /* The current instruction words.  */
+    unsigned int i1, i2;
+    unsigned int saved_i1, saved_i2;
+    enum copper_states state;
+    /* Instruction pointer.  */
+    uaecptr ip, saved_ip;
+    int hpos, vpos;
+    unsigned int ignore_next;
+    int vcmp, hcmp;
+
+    /* When we schedule a copper event, knowing a few things about the future
+       of the copper list can reduce the number of sync_with_cpu calls
+       dramatically.  */
+    unsigned int first_sync;
+    unsigned int regtypes_modified;
+};
+
+#define REGTYPE_NONE 0
+#define REGTYPE_COLOR 1
+#define REGTYPE_SPRITE 2
+#define REGTYPE_PLANE 4
+#define REGTYPE_BLITTER 8
+#define REGTYPE_JOYPORT 16
+#define REGTYPE_DISK 32
+#define REGTYPE_POS 64
+#define REGTYPE_AUDIO 128
+
+#define REGTYPE_ALL 255
+/* Always set in regtypes_modified, to enable a forced update when things like
+   DMACON, BPLCON0, COPJMPx get written.  */
+#define REGTYPE_FORCE 256
+
+
+static unsigned int regtypes[512];
+
+static struct copper cop_state;
+static int copper_enabled_thisline;
+static int cop_min_waittime;
+
+/*
+ * Statistics
+ */
+
+/* Used also by bebox.cpp */
+unsigned long int msecs = 0, frametime = 0, lastframetime = 0, timeframes = 0;
+static unsigned long int seconds_base;
+int bogusframe;
+int n_frames;
+
+#define DEBUG_COPPER 0
+#if DEBUG_COPPER
+/* 10000 isn't enough!  */
+#define NR_COPPER_RECORDS 40000
+#else
+#define NR_COPPER_RECORDS 1
+#endif
+
+/* Record copper activity for the debugger.  */
+struct cop_record
+{
+  int hpos, vpos;
+  uaecptr addr;
+};
+static struct cop_record cop_record[2][NR_COPPER_RECORDS];
+static int nr_cop_records[2];
+static int curr_cop_set;
+
+/* Recording of custom chip register changes.  */
+static int current_change_set;
+
+#ifdef OS_WITHOUT_MEMORY_MANAGEMENT
+/* sam: Those arrays uses around 7Mb of BSS... That seems  */
+/* too much for AmigaDOS (uae crashes as soon as one loads */
+/* it. So I use a different strategy here (realloc the     */
+/* arrays when needed. That strategy might be usefull for  */
+/* computer with low memory.                               */
+struct sprite_entry  *sprite_entries[2];
+struct color_change *color_changes[2];
+static int max_sprite_entry = 400;
+static int delta_sprite_entry = 0;
+static int max_color_change = 400;
+static int delta_color_change = 0;
+#else
+struct sprite_entry sprite_entries[2][MAX_SPR_PIXELS / 16];
+struct color_change color_changes[2][MAX_REG_CHANGE];
+#endif
+
+struct decision line_decisions[2 * (MAXVPOS + 1) + 1];
+struct draw_info line_drawinfo[2][2 * (MAXVPOS + 1) + 1];
+struct color_entry color_tables[2][(MAXVPOS + 1) * 2];
+
+static int next_sprite_entry = 0;
+static int prev_next_sprite_entry;
+static int next_sprite_forced = 1;
+
+struct sprite_entry *curr_sprite_entries, *prev_sprite_entries;
+struct color_change *curr_color_changes, *prev_color_changes;
+struct draw_info *curr_drawinfo, *prev_drawinfo;
+struct color_entry *curr_color_tables, *prev_color_tables;
+
+static int next_color_change;
+static int next_color_entry, remembered_color_entry;
+static int color_src_match, color_dest_match, color_compare_result;
+
+static uae_u32 thisline_changed;
+
+#ifdef SMART_UPDATE
+#define MARK_LINE_CHANGED do { thisline_changed = 1; } while (0)
+#else
+#define MARK_LINE_CHANGED do { ; } while (0)
+#endif
+
+static struct decision thisline_decision;
+static int passed_plfstop, fetch_cycle;
+
+enum fetchstate {
+    fetch_not_started,
+    fetch_started,
+    fetch_was_plane0
+} fetch_state;
+
+/*
+ * helper functions
+ */
+
+uae_u32 get_copper_address (int copno)
+{
+    switch (copno) {
+    case 1: return cop1lc;
+    case 2: return cop2lc;
+    default: return 0;
+    }
+}
+
+STATIC_INLINE void record_copper (uaecptr addr, int hpos, int vpos)
+{
+#if DEBUG_COPPER
+    int t = nr_cop_records[curr_cop_set];
+    if (t < NR_COPPER_RECORDS) {
+	cop_record[curr_cop_set][t].addr = addr;
+	cop_record[curr_cop_set][t].hpos = hpos;
+	cop_record[curr_cop_set][t].vpos = vpos;
+	nr_cop_records[curr_cop_set] = t + 1;
+    }
+#endif
+}
+
+int find_copper_record (uaecptr addr, int *phpos, int *pvpos)
+{
+    int s = curr_cop_set ^ 1;
+    int t = nr_cop_records[s];
+    int i;
+    for (i = 0; i < t; i++) {
+	if (cop_record[s][i].addr == addr) {
+	    *phpos = cop_record[s][i].hpos;
+	    *pvpos = cop_record[s][i].vpos;
+	    return 1;
+	}
+    }
+    return 0;
+}
+
+int rpt_available = 0;
+
+void reset_frame_rate_hack (void)
+{
+    if (currprefs.m68k_speed != -1)
+	return;
+
+    if (! rpt_available) {
+	currprefs.m68k_speed = 0;
+	return;
+    }
+
+    rpt_did_reset = 1;
+    is_lastline = 0;
+    vsyncmintime = read_processor_time() + vsynctime;
+    write_log ("Resetting frame rate hack\n");
+}
+
+void check_prefs_changed_custom (void)
+{
+    currprefs.gfx_framerate = changed_prefs.gfx_framerate;
+    /* Not really the right place... */
+    if (currprefs.jport0 != changed_prefs.jport0
+	|| currprefs.jport1 != changed_prefs.jport1) {
+	currprefs.jport0 = changed_prefs.jport0;
+	currprefs.jport1 = changed_prefs.jport1;
+	joystick_setting_changed ();
+    }
+    currprefs.immediate_blits = changed_prefs.immediate_blits;
+    currprefs.blits_32bit_enabled = changed_prefs.blits_32bit_enabled;
+    currprefs.collision_level = changed_prefs.collision_level;
+    currprefs.fast_copper = changed_prefs.fast_copper;
+}
+
+STATIC_INLINE void setclr (uae_u16 *p, uae_u16 val)
+{
+    if (val & 0x8000)
+	*p |= val & 0x7FFF;
+    else
+	*p &= ~val;
+}
+
+__inline__ int current_hpos (void)
+{
+    return (get_cycles () - eventtab[ev_hsync].oldcycles) / CYCLE_UNIT;
+}
+
+STATIC_INLINE uae_u8 *pfield_xlateptr (uaecptr plpt, int bytecount)
+{
+    if (!chipmem_bank.check (plpt, bytecount)) {
+	static int count = 0;
+	if (!count)
+	    count++, write_log ("Warning: Bad playfield pointer\n");
+	return NULL;
+    }
+    return chipmem_bank.xlateaddr (plpt);
+}
+
+STATIC_INLINE void docols (struct color_entry *colentry)
+{
+    int i;
+
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	for (i = 0; i < 256; i++) {
+	    int v = color_reg_get (colentry, i);
+	    if (v < 0 || v > 16777215)
+		continue;
+	    colentry->acolors[i] = CONVERT_RGB (v);
+	}
+    } else {
+	for (i = 0; i < 32; i++) {
+	    int v = color_reg_get (colentry, i);
+	    if (v < 0 || v > 4095)
+		continue;
+	    colentry->acolors[i] = xcolors[v];
+	}
+    }
+}
+
+void notice_new_xcolors (void)
+{
+    int i;
+
+    docols(&current_colors);
+/*    docols(&colors_for_drawing);*/
+    for (i = 0; i < (MAXVPOS + 1)*2; i++) {
+	docols(color_tables[0]+i);
+	docols(color_tables[1]+i);
+    }
+}
+
+static void do_sprites (int currhp);
+
+static void remember_ctable (void)
+{
+    if (remembered_color_entry == -1) {
+	/* The colors changed since we last recorded a color map. Record a
+	 * new one. */
+	color_reg_cpy (curr_color_tables + next_color_entry, &current_colors);
+	remembered_color_entry = next_color_entry++;
+    }
+    thisline_decision.ctable = remembered_color_entry;
+    if (color_src_match == -1 || color_dest_match != remembered_color_entry
+	|| line_decisions[next_lineno].ctable != color_src_match)
+    {
+	/* The remembered comparison didn't help us - need to compare again. */
+	int oldctable = line_decisions[next_lineno].ctable;
+	int changed = 0;
+
+	if (oldctable == -1) {
+	    changed = 1;
+	    color_src_match = color_dest_match = -1;
+	} else {
+	    color_compare_result = color_reg_cmp (&prev_color_tables[oldctable], &current_colors) != 0;
+	    if (color_compare_result)
+		changed = 1;
+	    color_src_match = oldctable;
+	    color_dest_match = remembered_color_entry;
+	}
+	thisline_changed |= changed;
+    } else {
+	/* We know the result of the comparison */
+	if (color_compare_result)
+	    thisline_changed = 1;
+    }
+}
+
+static void remember_ctable_for_border (void)
+{
+    remember_ctable ();
+}
+
+/* Called to determine the state of the horizontal display window state
+ * machine at the current position. It might have changed since we last
+ * checked.  */
+static void decide_diw (int hpos)
+{
+    int pix_hpos = coord_diw_to_window_x (hpos * 2);
+    if (hdiwstate == DIW_waiting_start && thisline_decision.diwfirstword == -1
+	&& pix_hpos >= diwfirstword && last_diw_pix_hpos < diwfirstword)
+    {
+	thisline_decision.diwfirstword = diwfirstword < 0 ? 0 : diwfirstword;
+	hdiwstate = DIW_waiting_stop;
+	thisline_decision.diwlastword = -1;
+    }
+    if (hdiwstate == DIW_waiting_stop && thisline_decision.diwlastword == -1
+	&& pix_hpos >= diwlastword && last_diw_pix_hpos < diwlastword)
+    {
+	thisline_decision.diwlastword = diwlastword < 0 ? 0 : diwlastword;
+	hdiwstate = DIW_waiting_start;
+    }
+    last_diw_pix_hpos = pix_hpos;
+}
+
+/* The HRM says 0xD8, but that can't work... */
+#define HARD_DDF_STOP (0xD4)
+
+static void finish_playfield_line (void)
+{
+    int m1, m2;
+
+    /* The latter condition might be able to happen in interlaced frames. */
+    if (vpos >= minfirstline && (thisframe_first_drawn_line == -1 || vpos < thisframe_first_drawn_line))
+	thisframe_first_drawn_line = vpos;
+    thisframe_last_drawn_line = vpos;
+
+    if ((currprefs.chipset_mask & CSMASK_AGA) && (fmode & 0x4000)) {
+	if (((diwstrt >> 8) ^ vpos) & 1)
+	    m1 = m2 = bpl2mod;
+	else
+	    m1 = m2 = bpl1mod;
+    } else {
+	m1 = bpl1mod;
+	m2 = bpl2mod;
+    }
+
+    if (dmaen (DMA_BITPLANE))
+	switch (GET_PLANES (bplcon0)) {
+	case 8: bplpt[7] += m2;
+	case 7: bplpt[6] += m1;
+	case 6: bplpt[5] += m2;
+	case 5: bplpt[4] += m1;
+	case 4: bplpt[3] += m2;
+	case 3: bplpt[2] += m1;
+	case 2: bplpt[1] += m2;
+	case 1: bplpt[0] += m1;
+	}
+
+    /* These are for comparison. */
+    thisline_decision.bplcon0 = bplcon0;
+    thisline_decision.bplcon2 = bplcon2;
+    thisline_decision.bplcon3 = bplcon3;
+    thisline_decision.bplcon4 = bplcon4;
+
+#ifdef SMART_UPDATE
+    if (line_decisions[next_lineno].plflinelen != thisline_decision.plflinelen
+	|| line_decisions[next_lineno].plfleft != thisline_decision.plfleft
+	|| line_decisions[next_lineno].bplcon0 != thisline_decision.bplcon0
+	|| line_decisions[next_lineno].bplcon2 != thisline_decision.bplcon2
+	|| line_decisions[next_lineno].bplcon3 != thisline_decision.bplcon3
+	|| line_decisions[next_lineno].bplcon4 != thisline_decision.bplcon4
+	)
+#endif /* SMART_UPDATE */
+	thisline_changed = 1;
+}
+
+static int fetchmode;
+
+/* The fetch unit mainly controls ddf stop.  It's the number of cycles that
+   are contained in an indivisible block during which ddf is active.  E.g.
+   if DDF starts at 0x30, and fetchunit is 8, then possible DDF stops are
+   0x30 + n * 8.  */
+static int fetchunit, fetchunit_mask;
+/* The delay before fetching the same bitplane again.  Can be larger than
+   the number of bitplanes; in that case there are additional empty cycles
+   with no data fetch (this happens for high fetchmodes and low
+   resolutions).  */
+static int fetchstart, fetchstart_shift, fetchstart_mask;
+/* fm_maxplane holds the maximum number of planes possible with the current
+   fetch mode.  This selects the cycle diagram:
+   8 planes: 73516240
+   4 planes: 3120
+   2 planes: 10.  */
+static int fm_maxplane, fm_maxplane_shift;
+
+/* The corresponding values, by fetchmode and display resolution.  */
+static int fetchunits[] = { 8,8,8,0, 16,8,8,0, 32,16,8,0 };
+static int fetchstarts[] = { 3,2,1,0, 4,3,2,0, 5,4,3,0 };
+static int fm_maxplanes[] = { 3,2,1,0, 3,3,2,0, 3,3,3,0 }; 
+
+static int cycle_diagram_table[3][3][9][32];
+static int *curr_diagram;
+static int cycle_sequences[3*8] = { 2,1,2,1,2,1,2,1, 4,2,3,1,4,2,3,1, 8,4,6,2,7,3,5,1 };
+
+static void debug_cycle_diagram(void)
+{
+    int fm, res, planes, cycle, v;
+    char aa;
+
+    for (fm = 0; fm < 3; fm++) {
+	write_log ("FMODE %d\n=======\n", fm);
+	for (res = 0; res <= 2; res++) {
+	    for (planes = 0; planes <= 8; planes++) {
+		write_log("%d: ",planes);
+		for (cycle = 0; cycle < 32; cycle++) {
+		    v=cycle_diagram_table[fm][res][planes][cycle];
+		    if (v==0) aa='-'; else if(v>0) aa='1'; else aa='X';
+		    write_log("%c",aa);
+		}
+		write_log("\n");
+	    }
+	    write_log("\n");
+	}
+    }
+    fm=0;
+}
+
+static void create_cycle_diagram_table(void)
+{
+    int fm, res, cycle, planes, v;
+    int fetch_start, max_planes;
+    int *cycle_sequence;
+
+    for (fm = 0; fm <= 2; fm++) {
+	for (res = 0; res <= 2; res++) {
+	    max_planes = fm_maxplanes[fm * 4 + res];
+	    fetch_start = 1 << fetchstarts[fm * 4 + res];
+	    cycle_sequence = &cycle_sequences[(max_planes - 1) * 8];
+	    max_planes = 1 << max_planes;
+	    for (planes = 0; planes <= 8; planes++) {
+		for (cycle = 0; cycle < 32; cycle++)
+		    cycle_diagram_table[fm][res][planes][cycle] = -1;
+		if (planes <= max_planes) {
+		    for (cycle = 0; cycle < fetch_start; cycle++) {
+			if (cycle < max_planes && planes >= cycle_sequence[cycle & 7]) {
+			    v = 1;
+			} else {
+			    v = 0;
+			}
+			cycle_diagram_table[fm][res][planes][cycle] = v;
+		    }
+		}
+	    }
+	}
+    }
+#if 0
+    debug_cycle_diagram ();
+#endif
+}
+
+
+/* Used by the copper.  */
+static int estimated_last_fetch_cycle;
+static int cycle_diagram_shift;
+
+static void estimate_last_fetch_cycle (int hpos)
+{
+    int fetchunit = fetchunits[fetchmode * 4 + GET_RES (bplcon0)];
+
+    if (! passed_plfstop) {
+	int stop = plfstop < hpos || plfstop > HARD_DDF_STOP ? HARD_DDF_STOP : plfstop;
+	/* We know that fetching is up-to-date up until hpos, so we can use fetch_cycle.  */
+	int fetch_cycle_at_stop = fetch_cycle + (stop - hpos);
+	int starting_last_block_at = (fetch_cycle_at_stop + fetchunit - 1) & ~(fetchunit - 1);
+
+	estimated_last_fetch_cycle = hpos + (starting_last_block_at - fetch_cycle) + fetchunit;
+    } else {
+	int starting_last_block_at = (fetch_cycle + fetchunit - 1) & ~(fetchunit - 1);
+	if (passed_plfstop == 2)
+	    starting_last_block_at -= fetchunit;
+
+	estimated_last_fetch_cycle = hpos + (starting_last_block_at - fetch_cycle) + fetchunit;
+    }
+}
+
+static uae_u32 outword[MAX_PLANES];
+static int out_nbits, out_offs;
+static uae_u32 todisplay[MAX_PLANES][4];
+static uae_u32 fetched[MAX_PLANES];
+static uae_u32 fetched_aga0[MAX_PLANES];
+static uae_u32 fetched_aga1[MAX_PLANES];
+
+/* Expansions from bplcon0/bplcon1.  */
+static int toscr_res, toscr_delay1, toscr_delay2, toscr_nr_planes, fetchwidth;
+
+/* The number of bits left from the last fetched words.  
+   This is an optimization - conceptually, we have to make sure the result is
+   the same as if toscr is called in each clock cycle.  However, to speed this
+   up, we accumulate display data; this variable keeps track of how much. 
+   Thus, once we do call toscr_nbits (which happens at least every 16 bits),
+   we can do more work at once.  */
+static int toscr_nbits;
+
+static int delayoffset;
+
+STATIC_INLINE void compute_delay_offset (int hpos)
+{
+    /* this fixes most horizontal scrolling jerkyness but can't be correct */
+    delayoffset = ((hpos - fm_maxplane - 0x18) & fetchstart_mask) << 1;
+    delayoffset &= ~7;
+    if (delayoffset & 8)
+	delayoffset = 8;
+    else if (delayoffset & 16)
+	delayoffset = 16;
+    else if (delayoffset & 32)
+	delayoffset = 32;
+    else
+	delayoffset = 0;
+}
+
+static void expand_fmodes (void)
+{
+    int res = GET_RES(bplcon0);
+    int fm = fetchmode;
+    fetchunit = fetchunits[fm * 4 + res];
+    fetchunit_mask = fetchunit - 1;
+    fetchstart_shift = fetchstarts[fm * 4 + res];
+    fetchstart = 1 << fetchstart_shift;
+    fetchstart_mask = fetchstart - 1;
+    fm_maxplane_shift = fm_maxplanes[fm * 4 + res];
+    fm_maxplane = 1 << fm_maxplane_shift;
+}
+
+static int maxplanes_ocs[]={ 6,4,0,0 };
+static int maxplanes_ecs[]={ 6,4,2,0 };
+static int maxplanes_aga[]={ 8,4,2,0, 8,8,4,0, 8,8,8,0 };
+
+/* Expand bplcon0/bplcon1 into the toscr_xxx variables.  */
+static void compute_toscr_delay_1 (void)
+{
+    int delay1 = (bplcon1 & 0x0f) | ((bplcon1 & 0x0c00) >> 6);
+    int delay2 = ((bplcon1 >> 4) & 0x0f) | (((bplcon1 >> 4) & 0x0c00) >> 6);
+    int delaymask;
+    int fetchwidth = 16 << fetchmode;
+
+    delay1 += delayoffset;
+    delay2 += delayoffset;
+    delaymask = (fetchwidth - 1) >> toscr_res;
+    toscr_delay1 = (delay1 & delaymask) << toscr_res;
+    toscr_delay2 = (delay2 & delaymask) << toscr_res;
+}
+
+static void compute_toscr_delay (int hpos)
+{
+    int v = bplcon0;
+    int *planes;
+
+    if (currprefs.chipset_mask & CSMASK_AGA)
+	planes = maxplanes_aga;
+    else if (! (currprefs.chipset_mask & CSMASK_ECS_DENISE))
+	planes = maxplanes_ocs;
+    else
+	planes = maxplanes_ecs;
+    /* Disable bitplane DMA if planes > maxplanes.  This is needed e.g. by the
+       Sanity WOC demo (at the "Party Effect").  */
+    if (GET_PLANES(v) > planes[fetchmode*4 + GET_RES (v)])
+	v &= ~0x7010;
+    toscr_res = GET_RES (v);
+
+    toscr_nr_planes = GET_PLANES (v);
+
+    compute_toscr_delay_1 ();
+}
+
+STATIC_INLINE void maybe_first_bpl1dat (int hpos)
+{
+    if (thisline_decision.plfleft == -1) {
+	thisline_decision.plfleft = hpos;
+	compute_delay_offset (hpos);
+	compute_toscr_delay_1 ();
+    }
+}
+
+STATIC_INLINE void fetch (int nr, int fm)
+{
+    uaecptr p;
+    if (nr >= toscr_nr_planes)
+	return;
+    p = bplpt[nr] + bpl_off[nr];
+    switch (fm) {
+    case 0:
+	fetched[nr] = chipmem_wget (p);
+	bplpt[nr] += 2;
+	break;
+    case 1:
+	fetched_aga0[nr] = chipmem_lget (p);
+	bplpt[nr] += 4;
+	break;
+    case 2:
+	fetched_aga1[nr] = chipmem_lget (p);
+	fetched_aga0[nr] = chipmem_lget (p + 4);
+	bplpt[nr] += 8;
+	break;
+    }
+    if (nr == 0)
+	fetch_state = fetch_was_plane0;
+}
+
+static void clear_fetchbuffer (uae_u32 *ptr, int nwords)
+{
+    int i;
+
+    if (! thisline_changed)
+	for (i = 0; i < nwords; i++)
+	    if (ptr[i]) {
+		thisline_changed = 1;
+		break;
+	    }
+
+    memset (ptr, 0, nwords * 4);
+}
+
+static void update_toscr_planes (void)
+{
+    if (toscr_nr_planes > thisline_decision.nr_planes) {
+	int j;
+	for (j = thisline_decision.nr_planes; j < toscr_nr_planes; j++)
+	    clear_fetchbuffer ((uae_u32 *)(line_data[next_lineno] + 2 * MAX_WORDS_PER_LINE * j), out_offs);
+#if 0
+	if (thisline_decision.nr_planes > 0)
+	    printf ("Planes from %d to %d\n", thisline_decision.nr_planes, toscr_nr_planes);
+#endif
+	thisline_decision.nr_planes = toscr_nr_planes;
+    }
+}
+
+STATIC_INLINE void toscr_3_ecs (int nbits)
+{
+    int delay1 = toscr_delay1;
+    int delay2 = toscr_delay2;
+    int i;
+    uae_u32 mask = 0xFFFF >> (16 - nbits);
+
+    for (i = 0; i < toscr_nr_planes; i += 2) {
+	outword[i] <<= nbits;
+	outword[i] |= (todisplay[i][0] >> (16 - nbits + delay1)) & mask;
+	todisplay[i][0] <<= nbits;
+    }
+    for (i = 1; i < toscr_nr_planes; i += 2) {
+	outword[i] <<= nbits;
+	outword[i] |= (todisplay[i][0] >> (16 - nbits + delay2)) & mask;
+	todisplay[i][0] <<= nbits;
+    }
+}
+
+STATIC_INLINE void shift32plus (uae_u32 *p, int n)
+{
+    uae_u32 t = p[1];
+    t <<= n;
+    t |= p[0] >> (32 - n);
+    p[1] = t;
+}
+
+STATIC_INLINE void aga_shift (uae_u32 *p, int n, int fm)
+{
+    if (fm == 2) {
+	shift32plus (p + 2, n);
+	shift32plus (p + 1, n);
+    }
+    shift32plus (p + 0, n);
+    p[0] <<= n;
+}
+
+STATIC_INLINE void toscr_3_aga (int nbits, int fm)
+{
+    int delay1 = toscr_delay1;
+    int delay2 = toscr_delay2;
+    int i;
+    uae_u32 mask = 0xFFFF >> (16 - nbits);
+
+    {
+	int offs = (16 << fm) - nbits + delay1;
+	int off1 = offs >> 5;
+	if (off1 == 3)
+	    off1 = 2;
+	offs -= off1 * 32;
+	for (i = 0; i < toscr_nr_planes; i += 2) {
+	    uae_u32 t0 = todisplay[i][off1];
+	    uae_u32 t1 = todisplay[i][off1 + 1];
+	    uae_u64 t = (((uae_u64)t1) << 32) | t0;
+	    outword[i] <<= nbits;
+	    outword[i] |= (t >> offs) & mask;
+	    aga_shift (todisplay[i], nbits, fm);
+	}
+    }
+    {
+	int offs = (16 << fm) - nbits + delay2;
+	int off1 = offs >> 5;
+	if (off1 == 3)
+	    off1 = 2;
+	offs -= off1 * 32;
+	for (i = 1; i < toscr_nr_planes; i += 2) {
+	    uae_u32 t0 = todisplay[i][off1];
+	    uae_u32 t1 = todisplay[i][off1 + 1];
+	    uae_u64 t = (((uae_u64)t1) << 32) | t0;
+	    outword[i] <<= nbits;
+	    outword[i] |= (t >> offs) & mask;
+	    aga_shift (todisplay[i], nbits, fm);
+	}
+    }
+}
+
+static void toscr_2_0 (int nbits) { toscr_3_ecs (nbits); }
+static void toscr_2_1 (int nbits) { toscr_3_aga (nbits, 1); }
+static void toscr_2_2 (int nbits) { toscr_3_aga (nbits, 2); }
+
+STATIC_INLINE void toscr_1 (int nbits, int fm)
+{
+    switch (fm) {
+    case 0:
+	toscr_2_0 (nbits);
+	break;
+    case 1:
+	toscr_2_1 (nbits);
+	break;
+    case 2:
+	toscr_2_2 (nbits);
+	break;
+    }
+
+    out_nbits += nbits;
+    if (out_nbits == 32) {
+	int i;
+	uae_u8 *dataptr = line_data[next_lineno] + out_offs * 4;
+	/* Don't use toscr_nr_planes here; if the plane count drops during the
+	   line we still want the data to be correct for the full number of planes
+	   over the full width of the line.  */
+	for (i = 0; i < thisline_decision.nr_planes; i++) {
+	    uae_u32 *dataptr32 = (uae_u32 *)dataptr;
+	    if (*dataptr32 != outword[i])
+		thisline_changed = 1;
+	    *dataptr32 = outword[i];
+	    dataptr += MAX_WORDS_PER_LINE * 2;
+	}
+	out_offs++;
+	out_nbits = 0;
+    }
+}
+
+static void toscr_fm0 (int);
+static void toscr_fm1 (int);
+static void toscr_fm2 (int);
+
+STATIC_INLINE void toscr (int nbits, int fm)
+{
+    switch (fm) {
+    case 0: toscr_fm0 (nbits); break;
+    case 1: toscr_fm1 (nbits); break;
+    case 2: toscr_fm2 (nbits); break;
+    }
+}
+
+STATIC_INLINE void toscr_0 (int nbits, int fm)
+{
+    int t;
+
+    if (nbits > 16) {
+	toscr (16, fm);
+	nbits -= 16;
+    }
+
+    t = 32 - out_nbits;
+    if (t < nbits) {
+	toscr_1 (t, fm);
+	nbits -= t;
+    }
+    toscr_1 (nbits, fm);
+}
+
+static void toscr_fm0 (int nbits) { toscr_0 (nbits, 0); }
+static void toscr_fm1 (int nbits) { toscr_0 (nbits, 1); }
+static void toscr_fm2 (int nbits) { toscr_0 (nbits, 2); }
+
+static int flush_plane_data (int fm)
+{
+    int i = 0;
+    int fetchwidth = 16 << fm;
+
+    if (out_nbits <= 16) {
+	i += 16;
+	toscr_1 (16, fm);
+    }
+    if (out_nbits != 0) {
+	i += 32 - out_nbits;
+	toscr_1 (32 - out_nbits, fm);
+    }
+    i += 32;
+
+    toscr_1 (16, fm);
+    toscr_1 (16, fm);
+    return i >> (1 + toscr_res);
+}
+
+STATIC_INLINE void flush_display (int fm)
+{
+    if (toscr_nbits > 0 && thisline_decision.plfleft != -1)
+	toscr (toscr_nbits, fm);
+    toscr_nbits = 0;
+}
+
+/* Called when all planes have been fetched, i.e. when a new block
+   of data is available to be displayed.  The data in fetched[] is
+   moved into todisplay[].  */
+STATIC_INLINE void beginning_of_plane_block (int pos, int dma, int fm)
+{
+    int i;
+
+    flush_display (fm);
+
+    if (fm == 0)
+	for (i = 0; i < MAX_PLANES; i++) 
+	    todisplay[i][0] |= fetched[i];
+    else
+	for (i = 0; i < MAX_PLANES; i++) {
+	    if (fm == 2)
+		todisplay[i][1] = fetched_aga1[i];
+	    todisplay[i][0] = fetched_aga0[i];
+	}
+
+    maybe_first_bpl1dat (pos);
+}
+
+#define SPEEDUP
+
+#ifdef SPEEDUP
+
+/* The usual inlining tricks - don't touch unless you know what you are doing.  */
+STATIC_INLINE void long_fetch_ecs (int plane, int nwords, int weird_number_of_bits, int dma)
+{
+    uae_u16 *real_pt = (uae_u16 *)pfield_xlateptr (bplpt[plane] + bpl_off[plane], nwords * 2);
+    int delay = ((plane & 1) ? toscr_delay2 : toscr_delay1);
+    int tmp_nbits = out_nbits;
+    uae_u32 shiftbuffer = todisplay[plane][0];
+    uae_u32 outval = outword[plane];
+    uae_u32 fetchval = fetched[plane];
+    uae_u32 *dataptr = (uae_u32 *)(line_data[next_lineno] + 2 * plane * MAX_WORDS_PER_LINE + 4 * out_offs);
+
+    if (dma)
+	bplpt[plane] += nwords * 2;
+
+    if (real_pt == 0)
+	/* @@@ Don't do this, fall back on chipmem_wget instead.  */
+	return;
+
+    while (nwords > 0) {
+	int bits_left = 32 - tmp_nbits;
+	uae_u32 t;
+
+	shiftbuffer |= fetchval;
+
+	t = (shiftbuffer >> delay) & 0xFFFF;
+
+	if (weird_number_of_bits && bits_left < 16) {
+	    outval <<= bits_left;
+	    outval |= t >> (16 - bits_left);
+	    thisline_changed |= *dataptr ^ outval;
+	    *dataptr++ = outval;
+
+	    outval = t;
+	    tmp_nbits = 16 - bits_left;
+	    shiftbuffer <<= 16;
+	} else {
+	    outval = (outval << 16) | t;
+	    shiftbuffer <<= 16;
+	    tmp_nbits += 16;
+	    if (tmp_nbits == 32) {
+		thisline_changed |= *dataptr ^ outval;
+		*dataptr++ = outval;
+		tmp_nbits = 0;
+	    }
+	}
+	nwords--;
+	if (dma) {
+	    fetchval = do_get_mem_word (real_pt);
+	    real_pt++;
+	}
+    }
+    fetched[plane] = fetchval;
+    todisplay[plane][0] = shiftbuffer;
+    outword[plane] = outval;
+}
+
+STATIC_INLINE void long_fetch_aga (int plane, int nwords, int weird_number_of_bits, int fm, int dma)
+{
+    uae_u32 *real_pt = (uae_u32 *)pfield_xlateptr (bplpt[plane] + bpl_off[plane], nwords * 2);
+    int delay = ((plane & 1) ? toscr_delay2 : toscr_delay1);
+    int tmp_nbits = out_nbits;
+    uae_u32 *shiftbuffer = todisplay[plane];
+    uae_u32 outval = outword[plane];
+    uae_u32 fetchval0 = fetched_aga0[plane];
+    uae_u32 fetchval1 = fetched_aga1[plane];
+    uae_u32 *dataptr = (uae_u32 *)(line_data[next_lineno] + 2 * plane * MAX_WORDS_PER_LINE + 4 * out_offs);
+    int offs = (16 << fm) - 16 + delay;
+    int off1 = offs >> 5;
+    if (off1 == 3)
+	off1 = 2;
+    offs -= off1 * 32;
+
+    if (dma)
+	bplpt[plane] += nwords * 2;
+
+    if (real_pt == 0)
+	/* @@@ Don't do this, fall back on chipmem_wget instead.  */
+	return;
+
+    while (nwords > 0) {
+	int i;
+
+	shiftbuffer[0] = fetchval0;
+	if (fm == 2)
+	    shiftbuffer[1] = fetchval1;
+
+	for (i = 0; i < (1 << fm); i++) {
+	    int bits_left = 32 - tmp_nbits;
+
+	    uae_u32 t0 = shiftbuffer[off1];
+	    uae_u32 t1 = shiftbuffer[off1 + 1];
+	    uae_u64 t = (((uae_u64)t1) << 32) | t0;
+
+	    t0 = (t >> offs) & 0xFFFF;
+
+	    if (weird_number_of_bits && bits_left < 16) {
+		outval <<= bits_left;
+		outval |= t0 >> (16 - bits_left);
+
+		thisline_changed |= *dataptr ^ outval;
+		*dataptr++ = outval;
+
+		outval = t0;
+		tmp_nbits = 16 - bits_left;
+		aga_shift (shiftbuffer, 16, fm);
+	    } else {
+		outval = (outval << 16) | t0;
+		aga_shift (shiftbuffer, 16, fm);
+		tmp_nbits += 16;
+		if (tmp_nbits == 32) {
+		    thisline_changed |= *dataptr ^ outval;
+		    *dataptr++ = outval;
+		    tmp_nbits = 0;
+		}
+	    }
+	}
+
+	nwords -= 1 << fm;
+
+	if (dma) {
+	    if (fm == 1)
+		fetchval0 = do_get_mem_long (real_pt);
+	    else {
+		fetchval1 = do_get_mem_long (real_pt);
+		fetchval0 = do_get_mem_long (real_pt + 1);
+	    }
+	    real_pt += fm;
+	}
+    }
+    fetched_aga0[plane] = fetchval0;
+    fetched_aga1[plane] = fetchval1;
+    outword[plane] = outval;
+}
+
+static void long_fetch_ecs_0 (int hpos, int nwords, int dma) { long_fetch_ecs (hpos, nwords, 0, dma); }
+static void long_fetch_ecs_1 (int hpos, int nwords, int dma) { long_fetch_ecs (hpos, nwords, 1, dma); }
+static void long_fetch_aga_1_0 (int hpos, int nwords, int dma) { long_fetch_aga (hpos, nwords,  0, 1, dma); }
+static void long_fetch_aga_1_1 (int hpos, int nwords, int dma) { long_fetch_aga (hpos, nwords,  1, 1, dma); }
+static void long_fetch_aga_2_0 (int hpos, int nwords, int dma) { long_fetch_aga (hpos, nwords,  0, 2, dma); }
+static void long_fetch_aga_2_1 (int hpos, int nwords, int dma) { long_fetch_aga (hpos, nwords,  1, 2, dma); }
+
+static void do_long_fetch (int hpos, int nwords, int dma, int fm)
+{
+    int added;
+    int i;
+
+    flush_display (fm);
+    switch (fm) {
+    case 0:
+	if (out_nbits & 15) {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_ecs_1 (i, nwords, dma);
+	} else {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_ecs_0 (i, nwords, dma);
+	}
+	break;
+    case 1:
+	if (out_nbits & 15) {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_aga_1_1 (i, nwords, dma);
+	} else {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_aga_1_0 (i, nwords, dma);
+	}
+	break;
+    case 2:
+	if (out_nbits & 15) {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_aga_2_1 (i, nwords, dma);
+	} else {
+	    for (i = 0; i < toscr_nr_planes; i++)
+		long_fetch_aga_2_0 (i, nwords, dma);
+	}
+	break;
+    }
+
+    out_nbits += nwords * 16;
+    out_offs += out_nbits >> 5;
+    out_nbits &= 31;
+
+    if (dma && toscr_nr_planes > 0)
+	fetch_state = fetch_was_plane0;
+}
+
+#endif
+
+/* make sure fetch that goes beyond maxhpos is finished */
+static void finish_final_fetch (int i, int fm)
+{
+    passed_plfstop = 3;
+
+    if (thisline_decision.plfleft != -1) {
+	i += flush_plane_data (fm);
+	thisline_decision.plfright = i;
+	thisline_decision.plflinelen = out_offs;
+	thisline_decision.bplres = toscr_res;
+	finish_playfield_line ();
+    }
+}
+
+STATIC_INLINE int one_fetch_cycle_0 (int i, int ddfstop_to_test, int dma, int fm)
+{
+    if (! passed_plfstop && i == ddfstop_to_test)
+	passed_plfstop = 1;
+
+    if ((fetch_cycle & fetchunit_mask) == 0) {
+	if (passed_plfstop == 2) {
+	    finish_final_fetch (i, fm);
+	    return 1;
+	}
+	if (passed_plfstop)
+	    passed_plfstop++;
+    }
+    if (dma) {
+	/* fetchstart_mask can be larger than fm_maxplane if FMODE > 0.  This means
+	   that the remaining cycles are idle; we'll fall through the whole switch
+	   without doing anything.  */
+	int cycle_start = fetch_cycle & fetchstart_mask;
+	switch (fm_maxplane) {
+	case 8:
+	    switch (cycle_start) {
+	    case 0: fetch (7, fm); break;
+	    case 1: fetch (3, fm); break;
+	    case 2: fetch (5, fm); break;
+	    case 3: fetch (1, fm); break;
+	    case 4: fetch (6, fm); break;
+	    case 5: fetch (2, fm); break;
+	    case 6: fetch (4, fm); break;
+	    case 7: fetch (0, fm); break;
+	    }
+	    break;
+	case 4:
+	    switch (cycle_start) {
+	    case 0: fetch (3, fm); break;
+	    case 1: fetch (1, fm); break;
+	    case 2: fetch (2, fm); break;
+	    case 3: fetch (0, fm); break;
+	    }
+	    break;
+	case 2:
+	    switch (cycle_start) {
+	    case 0: fetch (1, fm); break;
+	    case 1: fetch (0, fm); break;
+	    }
+	    break;
+	}
+    }
+    fetch_cycle++;
+    toscr_nbits += 2 << toscr_res;
+
+    if (toscr_nbits == 16)
+	flush_display (fm);
+    if (toscr_nbits > 16)
+	abort ();
+
+    return 0;
+}
+
+static int one_fetch_cycle_fm0 (int i, int ddfstop_to_test, int dma) { return one_fetch_cycle_0 (i, ddfstop_to_test, dma, 0); }
+static int one_fetch_cycle_fm1 (int i, int ddfstop_to_test, int dma) { return one_fetch_cycle_0 (i, ddfstop_to_test, dma, 1); }
+static int one_fetch_cycle_fm2 (int i, int ddfstop_to_test, int dma) { return one_fetch_cycle_0 (i, ddfstop_to_test, dma, 2); }
+
+STATIC_INLINE int one_fetch_cycle (int i, int ddfstop_to_test, int dma, int fm)
+{
+    switch (fm) {
+    case 0: return one_fetch_cycle_fm0 (i, ddfstop_to_test, dma);
+    case 1: return one_fetch_cycle_fm1 (i, ddfstop_to_test, dma);
+    case 2: return one_fetch_cycle_fm2 (i, ddfstop_to_test, dma);
+    default: abort ();
+    }
+}
+
+STATIC_INLINE void update_fetch (int until, int fm)
+{
+    int pos;
+    int dma = dmaen (DMA_BITPLANE);
+
+    int ddfstop_to_test;
+
+    if (framecnt != 0 || passed_plfstop == 3)
+	return;
+
+    /* We need an explicit test against HARD_DDF_STOP here to guard against
+       programs that move the DDFSTOP before our current position before we
+       reach it.  */
+    ddfstop_to_test = HARD_DDF_STOP;
+    if (ddfstop >= last_fetch_hpos && ddfstop < HARD_DDF_STOP)
+	ddfstop_to_test = ddfstop;
+
+    compute_toscr_delay (last_fetch_hpos);
+    update_toscr_planes ();
+
+    pos = last_fetch_hpos;
+    cycle_diagram_shift = (last_fetch_hpos - fetch_cycle) & fetchstart_mask;
+
+    /* First, a loop that prepares us for the speedup code.  We want to enter
+       the SPEEDUP case with fetch_state == fetch_was_plane0, and then unroll
+       whole blocks, so that we end on the same fetch_state again.  */
+    for (; ; pos++) {
+	if (pos == until) {
+	    if (until >= maxhpos && passed_plfstop == 2) {
+		finish_final_fetch (pos, fm);
+		return;
+	    }
+	    flush_display (fm);
+	    return;
+	}
+
+	if (fetch_state == fetch_was_plane0)
+	    break;
+
+	fetch_state = fetch_started;
+	if (one_fetch_cycle (pos, ddfstop_to_test, dma, fm))
+	    return;
+    }
+
+#ifdef SPEEDUP
+    /* Unrolled version of the for loop below.  */
+    if (! passed_plfstop
+	&& dma
+	&& (fetch_cycle & fetchstart_mask) == (fm_maxplane & fetchstart_mask)
+# if 0
+	/* @@@ We handle this case, but the code would be simpler if we
+	 * disallowed it - it may even be possible to guarantee that
+	 * this condition never is false.  Later.  */
+	&& (out_nbits & 15) == 0
+# endif
+	&& toscr_nr_planes == thisline_decision.nr_planes)
+    {
+	int offs = (pos - fetch_cycle) & fetchunit_mask;
+	int ddf2 = ((ddfstop_to_test - offs + fetchunit - 1) & ~fetchunit_mask) + offs;
+	int ddf3 = ddf2 + fetchunit;
+	int stop = until < ddf2 ? until : until < ddf3 ? ddf2 : ddf3;
+	int count;
+
+	count = stop - pos;
+
+	if (count >= fetchstart) {
+	    count &= ~fetchstart_mask;
+
+	    if (thisline_decision.plfleft == -1) {
+		compute_delay_offset (pos);
+		compute_toscr_delay_1 ();
+	    }
+	    do_long_fetch (pos, count >> (3 - toscr_res), dma, fm);
+
+	    /* This must come _after_ do_long_fetch so as not to confuse flush_display
+	       into thinking the first fetch has produced any output worth emitting to
+	       the screen.  But the calculation of delay_offset must happen _before_.  */
+	    maybe_first_bpl1dat (pos);
+
+	    if (pos <= ddfstop_to_test && pos + count > ddfstop_to_test)
+		passed_plfstop = 1;
+	    if (pos <= ddfstop_to_test && pos + count > ddf2)
+		passed_plfstop = 2;
+	    pos += count;
+	    fetch_cycle += count;
+	}
+    }
+#endif
+    for (; pos < until; pos++) {
+	if (fetch_state == fetch_was_plane0)
+	    beginning_of_plane_block (pos, dma, fm);
+	fetch_state = fetch_started;
+
+	if (one_fetch_cycle (pos, ddfstop_to_test, dma, fm))
+	    return;
+    }
+    if (until >= maxhpos && passed_plfstop == 2) {
+	finish_final_fetch (pos, fm);
+	return;
+    }
+    flush_display (fm);
+}
+
+static void update_fetch_0 (int hpos) { update_fetch (hpos, 0); }
+static void update_fetch_1 (int hpos) { update_fetch (hpos, 1); }
+static void update_fetch_2 (int hpos) { update_fetch (hpos, 2); }
+
+STATIC_INLINE void decide_fetch (int hpos)
+{
+    if (fetch_state != fetch_not_started && hpos > last_fetch_hpos) {
+	switch (fetchmode) {
+	case 0: update_fetch_0 (hpos); break;
+	case 1: update_fetch_1 (hpos); break;
+	case 2: update_fetch_2 (hpos); break;
+	default: abort ();
+	}
+    }
+    last_fetch_hpos = hpos;
+}
+
+/* This function is responsible for turning on datafetch if necessary.  */
+STATIC_INLINE void decide_line (int hpos)
+{
+    if (hpos <= last_decide_line_hpos)
+	return;
+    if (fetch_state != fetch_not_started)
+	return;
+
+    /* Test if we passed the start of the DDF window.  */
+    if (last_decide_line_hpos < plfstrt && hpos >= plfstrt) {
+	/* First, take care of the vertical DIW.  Surprisingly enough, this seems to be
+	   correct here - putting this into decide_diw() results in garbage.  */
+	if (diwstate == DIW_waiting_start && vpos == plffirstline) {
+	    diwstate = DIW_waiting_stop;
+	}
+	if (diwstate == DIW_waiting_stop && vpos == plflastline) {
+	    diwstate = DIW_waiting_start;
+	}
+
+	/* If DMA isn't on by the time we reach plfstrt, then there's no
+	   bitplane DMA at all for the whole line.  */
+	if (dmaen (DMA_BITPLANE)
+	    && diwstate == DIW_waiting_stop)
+	{
+	    fetch_state = fetch_started;
+	    fetch_cycle = 0;
+	    last_fetch_hpos = plfstrt;
+	    out_nbits = 0;
+	    out_offs = 0;
+	    toscr_nbits = 0;
+
+	    compute_toscr_delay (last_fetch_hpos);
+
+	    /* If someone already wrote BPL1DAT, clear the area between that point and
+	       the real fetch start.  */
+	    if (framecnt == 0) {
+		if (thisline_decision.plfleft != -1) {
+		    out_nbits = (plfstrt - thisline_decision.plfleft) << (1 + toscr_res);
+		    out_offs = out_nbits >> 5;
+		    out_nbits &= 31;
+		}
+		update_toscr_planes ();
+	    }
+	    estimate_last_fetch_cycle (plfstrt);
+	    last_decide_line_hpos = hpos;
+	    do_sprites (plfstrt);
+	    return;
+	}
+    }
+
+    if (last_decide_line_hpos < 0x34)
+	do_sprites (hpos);
+
+    last_decide_line_hpos = hpos;
+}
+
+/* Called when a color is about to be changed (write to a color register),
+ * but the new color has not been entered into the table yet. */
+static void record_color_change (int hpos, int regno, unsigned long value)
+{
+    if (regno == -1 && value) {
+	thisline_decision.ham_seen = 1;
+	if (hpos < 0x18)
+	    thisline_decision.ham_at_start = 1;
+    }
+
+    /* Early positions don't appear on-screen. */
+    if (framecnt != 0 || vpos < minfirstline || hpos < 0x18
+	/*|| currprefs.emul_accuracy == 0*/)
+	return;
+
+    decide_diw (hpos);
+    decide_line (hpos);
+
+    if (thisline_decision.ctable == -1)
+	remember_ctable ();
+
+#ifdef OS_WITHOUT_MEMORY_MANAGEMENT
+    if (next_color_change >= max_color_change) {
+	++delta_color_change;
+	return;
+    }
+#endif
+    curr_color_changes[next_color_change].linepos = hpos;
+    curr_color_changes[next_color_change].regno = regno;
+    curr_color_changes[next_color_change++].value = value;
+}
+
+typedef int sprbuf_res_t, cclockres_t, hwres_t, bplres_t;
+
+static void do_playfield_collisions (void)
+{
+    uae_u8 *ld = line_data[next_lineno];
+    int i;
+
+    if (clxcon_bpl_enable == 0) {
+	clxdat |= 1;
+	return;
+    }
+	
+    for (i = thisline_decision.plfleft; i < thisline_decision.plfright; i += 2) {
+	int j;
+	uae_u32 total = 0xFFFFFFFF;
+	for (j = 0; j < 8; j++) {
+	    uae_u32 t = 0;
+	    if ((clxcon_bpl_enable & (1 << j)) == 0)
+		t = 0xFFFFFFFF;
+	    else if (j < thisline_decision.nr_planes) {
+		t = *(uae_u32 *)(line_data[next_lineno] + 2 * i + 2 * j * MAX_WORDS_PER_LINE);
+		t ^= ~(((clxcon_bpl_match >> j) & 1) - 1);
+	    }
+	    total &= t;
+	}
+	if (total)
+	    clxdat |= 1;	
+    }
+}
+
+/* Sprite-to-sprite collisions are taken care of in record_sprite.  This one does
+   playfield/sprite collisions.
+   That's the theory.  In practice this doesn't work yet.  I also suspect this code
+   is way too slow.  */
+static void do_sprite_collisions (void)
+{
+    int nr_sprites = curr_drawinfo[next_lineno].nr_sprites;
+    int first = curr_drawinfo[next_lineno].first_sprite_entry;
+    int i;
+    unsigned int collision_mask = clxmask[clxcon >> 12];
+    int bplres = GET_RES (bplcon0);
+    hwres_t ddf_left = thisline_decision.plfleft * 2 << bplres;
+    hwres_t hw_diwlast = coord_window_to_diw_x (thisline_decision.diwlastword);
+    hwres_t hw_diwfirst = coord_window_to_diw_x (thisline_decision.diwfirstword);
+
+    if (clxcon_bpl_enable == 0) {
+	clxdat |= 0x1FE;
+	return;
+    }
+
+    for (i = 0; i < nr_sprites; i++) {
+	struct sprite_entry *e = curr_sprite_entries + first + i;
+	sprbuf_res_t j;
+	sprbuf_res_t minpos = e->pos;
+	sprbuf_res_t maxpos = e->max;
+	hwres_t minp1 = minpos >> sprite_buffer_res;
+	hwres_t maxp1 = maxpos >> sprite_buffer_res;
+
+	if (maxp1 > hw_diwlast)
+	    maxpos = hw_diwlast << sprite_buffer_res;
+	if (maxp1 > thisline_decision.plfright * 2)
+	    maxpos = thisline_decision.plfright * 2 << sprite_buffer_res;
+	if (minp1 < hw_diwfirst)
+	    minpos = hw_diwfirst << sprite_buffer_res;
+	if (minp1 < thisline_decision.plfleft * 2)
+	    minpos = thisline_decision.plfleft * 2 << sprite_buffer_res;
+
+	for (j = minpos; j < maxpos; j++) {
+	    int sprpix = spixels[e->first_pixel + j - e->pos] & collision_mask;
+	    int k;
+	    int offs;
+
+	    if (sprpix == 0)
+		continue;
+
+	    offs = ((j << bplres) >> sprite_buffer_res) - ddf_left;
+	    sprpix = sprite_ab_merge[sprpix & 255] | (sprite_ab_merge[sprpix >> 8] << 2);
+	    sprpix <<= 1;
+
+	    /* Loop over number of playfields.  */
+	    for (k = 0; k < 2; k++) {
+		int l;
+		int match = 1;
+		int planes = ((currprefs.chipset_mask & CSMASK_AGA) ? 8 : 6);
+
+		for (l = k; match && l < planes; l += 2) {
+		    if (clxcon_bpl_enable & (1 << l)) {
+			int t = 0;
+			if (l < thisline_decision.nr_planes) {
+			    uae_u32 *ldata = (uae_u32 *)(line_data[next_lineno] + 2 * l * MAX_WORDS_PER_LINE);
+			    uae_u32 word = ldata[offs >> 5];
+			    t = (word >> (31 - (offs & 31))) & 1;
+			}
+			if (t != ((clxcon_bpl_match >> l) & 1))
+			    match = 0;
+		    }
+		}
+		if (match)
+		    clxdat |= sprpix;
+		sprpix <<= 4;
+	    }
+	}
+    }
+}
+
+static void expand_sprres (void)
+{
+    switch ((bplcon3 >> 6) & 3) {
+    case 0: /* ECS defaults (LORES,HIRES=140ns,SHRES=70ns) */
+	if ((currprefs.chipset_mask & CSMASK_ECS_DENISE) && GET_RES (bplcon0) == RES_SUPERHIRES)
+	    sprres = RES_HIRES;
+	else
+	    sprres = RES_LORES;
+	break;
+    case 1:
+	sprres = RES_LORES;
+	break;
+    case 2:
+	sprres = RES_HIRES;
+	break;
+    case 3:
+	sprres = RES_SUPERHIRES;
+	break;
+    }
+}
+
+STATIC_INLINE void record_sprite_1 (uae_u16 *buf, uae_u32 datab, int num, int dbl,
+				    unsigned int mask, int do_collisions, uae_u32 collision_mask)
+{
+    int j = 0;
+    while (datab) {
+	unsigned int tmp = *buf;
+	unsigned int col = (datab & 3) << (2 * num);
+	tmp |= col;
+	if ((j & mask) == 0)
+	    *buf++ = tmp;
+	if (dbl)
+	    *buf++ = tmp;
+	j++;
+	datab >>= 2;
+	if (do_collisions) {
+	    tmp &= collision_mask;
+	    if (tmp) {
+		unsigned int shrunk_tmp = sprite_ab_merge[tmp & 255] | (sprite_ab_merge[tmp >> 8] << 2);
+		clxdat |= sprclx[shrunk_tmp];
+	    }
+	}
+    }
+}
+
+/* DATAB contains the sprite data; 16 pixels in two-bit packets.  Bits 0/1
+   determine the color of the leftmost pixel, bits 2/3 the color of the next
+   etc.
+   This function assumes that for all sprites in a given line, SPRXP either
+   stays equal or increases between successive calls.
+
+   The data is recorded either in lores pixels (if ECS), or in hires pixels
+   (if AGA).  No support for SHRES sprites.  */
+
+static void record_sprite (int line, int num, int sprxp, uae_u16 *data, uae_u16 *datb, unsigned int ctl)
+{
+    struct sprite_entry *e = curr_sprite_entries + next_sprite_entry;
+    int i;
+    int word_offs;
+    uae_u16 *buf;
+    uae_u32 collision_mask;
+    int width = sprite_width;
+    int dbl = 0;
+    unsigned int mask = 0;
+
+    if (sprres != RES_LORES)
+	thisline_decision.any_hires_sprites = 1;
+
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	width = (width << 1) >> sprres;
+	dbl = sprite_buffer_res - sprres;
+	mask = sprres == RES_SUPERHIRES ? 1 : 0;
+    }
+
+    /* Try to coalesce entries if they aren't too far apart.  */
+    if (! next_sprite_forced && e[-1].max + 16 >= sprxp) {
+	e--;
+    } else {
+	next_sprite_entry++;
+	e->pos = sprxp;
+	e->has_attached = 0;
+    }
+
+    if (sprxp < e->pos)
+	abort ();
+
+    e->max = sprxp + width;
+    e[1].first_pixel = e->first_pixel + ((e->max - e->pos + 3) & ~3);
+    next_sprite_forced = 0;
+
+    collision_mask = clxmask[clxcon >> 12];
+    word_offs = e->first_pixel + sprxp - e->pos;
+
+    for (i = 0; i < sprite_width; i += 16) {
+	unsigned int da = *data;
+	unsigned int db = *datb;
+	uae_u32 datab = ((sprtaba[da & 0xFF] << 16) | sprtaba[da >> 8]
+			 | (sprtabb[db & 0xFF] << 16) | sprtabb[db >> 8]);
+
+	buf = spixels + word_offs + (i << dbl);
+	if (currprefs.collision_level > 0 && collision_mask)
+	    record_sprite_1 (buf, datab, num, dbl, mask, 1, collision_mask);
+	else
+	    record_sprite_1 (buf, datab, num, dbl, mask, 0, collision_mask);
+	data++;
+	datb++;
+    }
+
+    /* We have 8 bits per pixel in spixstate, two for every sprite pair.  The
+       low order bit records whether the attach bit was set for this pair.  */
+
+    if (ctl & (num << 7) & 0x80) {
+	uae_u32 state = 0x01010101 << (num - 1);
+	uae_u32 *stbuf = spixstate.words + (word_offs >> 2);
+	uae_u8 *stb1 = spixstate.bytes + word_offs;	
+	for (i = 0; i < width; i += 8) {
+	    stb1[0] |= state;
+	    stb1[1] |= state;
+	    stb1[2] |= state;
+	    stb1[3] |= state;
+	    stb1[4] |= state;
+	    stb1[5] |= state;
+	    stb1[6] |= state;
+	    stb1[7] |= state;
+	    stb1 += 8;
+	}
+	e->has_attached = 1;
+    }
+}
+
+static void decide_sprites (int hpos)
+{
+    int nrs[MAX_SPRITES], posns[MAX_SPRITES];
+    int count, i;
+    int point = hpos * 2;
+    int width = sprite_width;
+    int window_width = (width << lores_shift) >> sprres;
+
+    if (framecnt != 0 || hpos < 0x14 || nr_armed == 0 || point == last_sprite_point)
+	return;
+
+    decide_diw (hpos);
+    decide_line (hpos);
+
+#if 0
+    /* This tries to detect whether the line is border, but that doesn't work, it's too early.  */
+    if (thisline_decision.plfleft == -1)
+	return;
+#endif
+    count = 0;
+    for (i = 0; i < MAX_SPRITES; i++) {
+	int sprxp = spr[i].xpos;
+	int hw_xp = (sprxp >> sprite_buffer_res);
+	int window_xp = coord_hw_to_window_x (hw_xp) + (DIW_DDF_OFFSET << lores_shift);
+	int j, bestp;
+
+	if (! spr[i].armed || sprxp < 0 || hw_xp <= last_sprite_point || hw_xp > point)
+	    continue;
+	if ((thisline_decision.diwfirstword >= 0 && window_xp + window_width < thisline_decision.diwfirstword)
+	    || (thisline_decision.diwlastword >= 0 && window_xp > thisline_decision.diwlastword))
+	    continue;
+
+	/* Sort the sprites in order of ascending X position before recording them.  */
+	for (bestp = 0; bestp < count; bestp++) {
+	    if (posns[bestp] > sprxp)
+		break;
+	    if (posns[bestp] == sprxp && nrs[bestp] < i)
+		break;
+	}
+	for (j = count; j > bestp; j--) {
+	    posns[j] = posns[j-1];
+	    nrs[j] = nrs[j-1];
+	}
+	posns[j] = sprxp;
+	nrs[j] = i;
+	count++;
+    }
+    for (i = 0; i < count; i++) {
+	int nr = nrs[i];    
+	record_sprite (next_lineno, nr, spr[nr].xpos, sprdata[nr], sprdatb[nr], sprctl[nr]);
+    }
+    last_sprite_point = point;
+}
+
+STATIC_INLINE int sprites_differ (struct draw_info *dip, struct draw_info *dip_old)
+{
+    struct sprite_entry *this_first = curr_sprite_entries + dip->first_sprite_entry;
+    struct sprite_entry *this_last = curr_sprite_entries + dip->last_sprite_entry;
+    struct sprite_entry *prev_first = prev_sprite_entries + dip_old->first_sprite_entry;
+    int npixels;
+    int i;
+
+    if (dip->nr_sprites != dip_old->nr_sprites)
+	return 1;
+    
+    if (dip->nr_sprites == 0)
+	return 0;
+
+    for (i = 0; i < dip->nr_sprites; i++)
+	if (this_first[i].pos != prev_first[i].pos
+	    || this_first[i].max != prev_first[i].max
+	    || this_first[i].has_attached != prev_first[i].has_attached)
+	    return 1;
+
+    npixels = this_last->first_pixel + (this_last->max - this_last->pos) - this_first->first_pixel;
+    if (memcmp (spixels + this_first->first_pixel, spixels + prev_first->first_pixel,
+		npixels * sizeof (uae_u16)) != 0)
+	return 1;
+    if (memcmp (spixstate.bytes + this_first->first_pixel, spixstate.bytes + prev_first->first_pixel, npixels) != 0)
+	return 1;
+    return 0;
+}
+
+STATIC_INLINE int color_changes_differ (struct draw_info *dip, struct draw_info *dip_old)
+{
+    if (dip->nr_color_changes != dip_old->nr_color_changes)
+	return 1;
+    
+    if (dip->nr_color_changes == 0)
+	return 0;
+    if (memcmp (curr_color_changes + dip->first_color_change,
+		prev_color_changes + dip_old->first_color_change,
+		dip->nr_color_changes * sizeof *curr_color_changes) != 0)
+	return 1;
+    return 0;
+}
+
+/* End of a horizontal scan line. Finish off all decisions that were not
+ * made yet. */
+static void finish_decisions (void)
+{
+    struct draw_info *dip;
+    struct draw_info *dip_old;
+    struct decision *dp;
+    int changed;
+    int hpos = current_hpos ();
+
+    if (framecnt != 0)
+	return;
+
+    decide_diw (hpos);
+    decide_line (hpos);
+    decide_fetch (hpos);
+
+    if (thisline_decision.plfleft != -1 && thisline_decision.plflinelen == -1) {
+	if (fetch_state != fetch_not_started)
+	    abort ();
+	thisline_decision.plfright = thisline_decision.plfleft;
+	thisline_decision.plflinelen = 0;
+	thisline_decision.bplres = RES_LORES;
+    }
+
+    /* Large DIWSTOP values can cause the stop position never to be
+     * reached, so the state machine always stays in the same state and
+     * there's a more-or-less full-screen DIW. */
+    if (hdiwstate == DIW_waiting_stop || thisline_decision.diwlastword > max_diwlastword)
+	thisline_decision.diwlastword = max_diwlastword;
+
+    if (thisline_decision.diwfirstword != line_decisions[next_lineno].diwfirstword)
+	MARK_LINE_CHANGED;
+    if (thisline_decision.diwlastword != line_decisions[next_lineno].diwlastword)
+	MARK_LINE_CHANGED;
+
+    dip = curr_drawinfo + next_lineno;
+    dip_old = prev_drawinfo + next_lineno;
+    dp = line_decisions + next_lineno;
+    changed = thisline_changed;
+
+    if (thisline_decision.plfleft != -1) {
+	record_diw_line (thisline_decision.diwfirstword, thisline_decision.diwlastword);
+
+	decide_sprites (hpos);
+    }
+
+    dip->last_sprite_entry = next_sprite_entry;
+    dip->last_color_change = next_color_change;
+
+    if (thisline_decision.ctable == -1) {
+	if (thisline_decision.plfleft == -1)
+	    remember_ctable_for_border ();
+	else
+	    remember_ctable ();
+    }
+
+    dip->nr_color_changes = next_color_change - dip->first_color_change;
+    dip->nr_sprites = next_sprite_entry - dip->first_sprite_entry;
+
+    if (thisline_decision.plfleft != line_decisions[next_lineno].plfleft)
+	changed = 1;
+    if (! changed && color_changes_differ (dip, dip_old))
+	changed = 1;
+    if (!changed && thisline_decision.plfleft != -1 && sprites_differ (dip, dip_old))
+	changed = 1;
+
+    if (changed) {
+	thisline_changed = 1;
+	*dp = thisline_decision;
+    } else
+	/* The only one that may differ: */
+	dp->ctable = thisline_decision.ctable;
+}
+
+/* Set the state of all decisions to "undecided" for a new scanline. */
+static void reset_decisions (void)
+{
+    if (framecnt != 0)
+	return;
+
+    thisline_decision.any_hires_sprites = 0;
+    thisline_decision.nr_planes = 0;
+
+    thisline_decision.plfleft = -1;
+    thisline_decision.plflinelen = -1;
+    thisline_decision.ham_seen = !! (bplcon0 & 0x800);
+    thisline_decision.ham_at_start = !! (bplcon0 & 0x800);
+
+    /* decided_res shouldn't be touched before it's initialized by decide_line(). */
+    thisline_decision.diwfirstword = -1;
+    thisline_decision.diwlastword = -2;
+    if (hdiwstate == DIW_waiting_stop) {
+	thisline_decision.diwfirstword = 0;
+	if (thisline_decision.diwfirstword != line_decisions[next_lineno].diwfirstword)
+	    MARK_LINE_CHANGED;
+    }
+    thisline_decision.ctable = -1;
+
+    thisline_changed = 0;
+    curr_drawinfo[next_lineno].first_color_change = next_color_change;
+    curr_drawinfo[next_lineno].first_sprite_entry = next_sprite_entry;
+    next_sprite_forced = 1;
+
+    /* memset(sprite_last_drawn_at, 0, sizeof sprite_last_drawn_at); */
+    last_sprite_point = 0;
+    fetch_state = fetch_not_started;
+    passed_plfstop = 0;
+
+    memset (todisplay, 0, sizeof todisplay);
+    memset (fetched, 0, sizeof fetched);
+    memset (fetched_aga0, 0, sizeof fetched_aga0);
+    memset (fetched_aga1, 0, sizeof fetched_aga1);
+    memset (outword, 0, sizeof outword);
+
+    last_decide_line_hpos = -1;
+    last_diw_pix_hpos = -1;
+    last_ddf_pix_hpos = -1;
+    last_sprite_hpos = -1;
+    last_fetch_hpos = -1;
+}
+
+void compute_vsynctime (void)
+{
+    vsynctime = syncbase / vblank_hz;
+    if (currprefs.produce_sound > 1) {
+	vsynctime = vsynctime * 9 / 10;
+    }
+}
+
+/* set PAL or NTSC timing variables */
+
+static void init_hz (void)
+{
+    int isntsc;
+
+    beamcon0 = new_beamcon0;
+
+    isntsc = beamcon0 & 0x20 ? 0 : 1;
+    if (!isntsc) {
+	maxvpos = MAXVPOS_PAL;
+	maxhpos = MAXHPOS_PAL;
+	minfirstline = MINFIRSTLINE_PAL;
+	vblank_endline = VBLANK_ENDLINE_PAL;
+	vblank_hz = VBLANK_HZ_PAL;
+    } else {
+	maxvpos = MAXVPOS_NTSC;
+	maxhpos = MAXHPOS_NTSC;
+	minfirstline = MINFIRSTLINE_NTSC;
+	vblank_endline = VBLANK_ENDLINE_NTSC;
+	vblank_hz = VBLANK_HZ_NTSC;
+    }
+    compute_vsynctime ();
+
+    write_log ("Using %s timing\n", isntsc ? "NTSC" : "PAL");
+}
+
+static void calcdiw (void)
+{
+    int hstrt = diwstrt & 0xFF;
+    int hstop = diwstop & 0xFF;
+    int vstrt = diwstrt >> 8;
+    int vstop = diwstop >> 8;
+
+    if (diwhigh_written) {
+	hstrt |= ((diwhigh >> 5) & 1) << 8;
+	hstop |= ((diwhigh >> 13) & 1) << 8;
+	vstrt |= (diwhigh & 7) << 8;
+	vstop |= ((diwhigh >> 8) & 7) << 8;
+    } else {
+	hstop += 0x100;
+	if ((vstop & 0x80) == 0)
+	    vstop |= 0x100;
+    }
+
+    diwfirstword = coord_diw_to_window_x (hstrt);
+    diwlastword = coord_diw_to_window_x (hstop);
+    if (diwfirstword < 0)
+	diwfirstword = 0;
+
+    plffirstline = vstrt;
+    plflastline = vstop;
+
+#if 0
+    /* This happens far too often. */
+    if (plffirstline < minfirstline) {
+	write_log ("Warning: Playfield begins before line %d!\n", minfirstline);
+	plffirstline = minfirstline;
+    }
+#endif
+
+#if 0 /* Turrican does this */
+    if (plflastline > 313) {
+	write_log ("Warning: Playfield out of range!\n");
+	plflastline = 313;
+    }
+#endif
+    plfstrt = ddfstrt;
+    plfstop = ddfstop;
+    if (plfstrt < 0x18)
+	plfstrt = 0x18;
+}
+
+/* Mousehack stuff */
+
+#define defstepx (1<<16)
+#define defstepy (1<<16)
+#define defxoffs 0
+#define defyoffs 0
+
+static const int docal = 60, xcaloff = 40, ycaloff = 20;
+static const int calweight = 3;
+static int lastsampledmx, lastsampledmy;
+static int lastspr0x,lastspr0y,lastdiffx,lastdiffy,spr0pos,spr0ctl;
+static int mstepx,mstepy,xoffs=defxoffs,yoffs=defyoffs;
+static int sprvbfl;
+
+int lastmx, lastmy;
+int newmousecounters;
+int ievent_alive = 0;
+
+static enum { unknown_mouse, normal_mouse, dont_care_mouse, follow_mouse } mousestate;
+
+static void mousehack_setdontcare (void)
+{
+    if (mousestate == dont_care_mouse)
+	return;
+
+    write_log ("Don't care mouse mode set\n");
+    mousestate = dont_care_mouse;
+    lastspr0x = lastmx; lastspr0y = lastmy;
+    mstepx = defstepx; mstepy = defstepy;
+}
+
+static void mousehack_setfollow (void)
+{
+    if (mousestate == follow_mouse)
+	return;
+
+    write_log ("Follow sprite mode set\n");
+    mousestate = follow_mouse;
+    lastdiffx = lastdiffy = 0;
+    sprvbfl = 0;
+    spr0ctl = spr0pos = 0;
+    mstepx = defstepx; mstepy = defstepy;
+}
+
+static uae_u32 mousehack_helper (void)
+{
+    int mousexpos, mouseypos;
+
+#ifdef PICASSO96
+    if (picasso_on) {
+	picasso_clip_mouse (&lastmx, &lastmy);
+	mousexpos = lastmx;
+	mouseypos = lastmy;
+    } else
+#endif
+    {
+	/* @@@ This isn't completely right, it doesn't deal with virtual
+	   screen sizes larger than physical very well.  */
+	if (lastmy >= gfxvidinfo.height)
+	    lastmy = gfxvidinfo.height - 1;
+	if (lastmy < 0)
+	    lastmy = 0;
+	if (lastmx < 0)
+	    lastmx = 0;
+	if (lastmx >= gfxvidinfo.width)
+	    lastmx = gfxvidinfo.width - 1;
+	mouseypos = coord_native_to_amiga_y (lastmy) << 1;
+	mousexpos = coord_native_to_amiga_x (lastmx);
+    }
+
+    switch (m68k_dreg (regs, 0)) {
+    case 0:
+	return ievent_alive ? -1 : needmousehack ();
+    case 1:
+	ievent_alive = 10;
+	return mousexpos;
+    case 2:
+	return mouseypos;
+    }
+    return 0;
+}
+
+void togglemouse (void)
+{
+    switch (mousestate) {
+     case dont_care_mouse: mousehack_setfollow (); break;
+     case follow_mouse: mousehack_setdontcare (); break;
+     default: break; /* Nnnnnghh! */
+    }
+}
+
+STATIC_INLINE int adjust (int val)
+{
+    if (val > 127)
+	return 127;
+    else if (val < -127)
+	return -127;
+    return val;
+}
+
+static void do_mouse_hack (void)
+{
+    int spr0x = ((spr0pos & 0xff) << 2) | ((spr0ctl & 1) << 1);
+    int spr0y = ((spr0pos >> 8) | ((spr0ctl & 4) << 6)) << 1;
+    int diffx, diffy;
+
+    if (ievent_alive > 0) {
+	mouse_x = mouse_y = 0;
+	return;
+    }
+    switch (mousestate) {
+    case normal_mouse:
+	diffx = lastmx - lastsampledmx;
+	diffy = lastmy - lastsampledmy;
+	if (!newmousecounters) {
+	    if (diffx > 127) diffx = 127;
+	    if (diffx < -127) diffx = -127;
+	    mouse_x += diffx;
+	    if (diffy > 127) diffy = 127;
+	    if (diffy < -127) diffy = -127;
+	    mouse_y += diffy;
+	}
+	lastsampledmx += diffx; lastsampledmy += diffy;
+	break;
+
+    case dont_care_mouse:
+	diffx = adjust (((lastmx - lastspr0x) * mstepx) >> 16);
+	diffy = adjust (((lastmy - lastspr0y) * mstepy) >> 16);
+	lastspr0x = lastmx; lastspr0y = lastmy;
+	mouse_x += diffx; mouse_y += diffy;
+	break;
+
+    case follow_mouse:
+	if (sprvbfl && sprvbfl-- > 1) {
+	    int mousexpos, mouseypos;
+
+	    if ((lastdiffx > docal || lastdiffx < -docal)
+		&& lastspr0x != spr0x
+		&& spr0x > plfstrt*4 + 34 + xcaloff
+		&& spr0x < plfstop*4 - xcaloff)
+	    {
+		int val = (lastdiffx << 16) / (spr0x - lastspr0x);
+		if (val >= 0x8000)
+		    mstepx = (mstepx * (calweight - 1) + val) / calweight;
+	    }
+	    if ((lastdiffy > docal || lastdiffy < -docal)
+		&& lastspr0y != spr0y
+		&& spr0y > plffirstline + ycaloff
+		&& spr0y < plflastline - ycaloff)
+	    {
+		int val = (lastdiffy << 16) / (spr0y - lastspr0y);
+		if (val >= 0x8000)
+		    mstepy = (mstepy * (calweight - 1) + val) / calweight;
+	    }
+	    if (lastmy >= gfxvidinfo.height)
+		lastmy = gfxvidinfo.height-1;
+	    mouseypos = coord_native_to_amiga_y (lastmy) << 1;
+	    mousexpos = coord_native_to_amiga_x (lastmx);
+	    diffx = adjust ((((mousexpos + xoffs - spr0x) & ~1) * mstepx) >> 16);
+	    diffy = adjust ((((mouseypos + yoffs - spr0y) & ~1) * mstepy) >> 16);
+	    lastspr0x = spr0x; lastspr0y = spr0y;
+	    lastdiffx = diffx; lastdiffy = diffy;
+	    mouse_x += diffx; mouse_y += diffy;
+	}
+	break;
+	
+    default:
+	abort ();
+    }
+}
+
+static void mousehack_handle (unsigned int ctl, unsigned int pos)
+{
+    if (!sprvbfl && ((pos & 0xff) << 2) > 2 * DISPLAY_LEFT_SHIFT) {
+	spr0ctl = ctl;
+	spr0pos = pos;
+	sprvbfl = 2;
+    }
+}
+
+static int timehack_alive = 0;
+
+static uae_u32 timehack_helper (void)
+{
+#ifdef HAVE_GETTIMEOFDAY
+    struct timeval tv;
+    if (m68k_dreg (regs, 0) == 0)
+	return timehack_alive;
+
+    timehack_alive = 10;
+
+    gettimeofday (&tv, NULL);
+    put_long (m68k_areg (regs, 0), tv.tv_sec - (((365 * 8 + 2) * 24 - 2) * 60 * 60));
+    put_long (m68k_areg (regs, 0) + 4, tv.tv_usec);
+    return 0;
+#else
+    return 2;
+#endif
+}
+
+ /*
+  * register functions
+  */
+STATIC_INLINE uae_u16 DENISEID (void)
+{
+    if (currprefs.chipset_mask & CSMASK_AGA)
+	return 0xF8;
+    if (currprefs.chipset_mask & CSMASK_ECS_DENISE)
+	return 0xFC;
+    return 0xFFFF;
+}
+STATIC_INLINE uae_u16 DMACONR (void)
+{
+    return (dmacon | (bltstate==BLT_done ? 0 : 0x4000)
+	    | (blt_info.blitzero ? 0x2000 : 0));
+}
+STATIC_INLINE uae_u16 INTENAR (void)
+{
+    return intena;
+}
+uae_u16 INTREQR (void)
+{
+    return intreq /* | (currprefs.use_serial ? 0x0001 : 0) */;
+}
+STATIC_INLINE uae_u16 ADKCONR (void)
+{
+    return adkcon;
+}
+STATIC_INLINE uae_u16 VPOSR (void)
+{
+    unsigned int csbit = currprefs.ntscmode ? 0x1000 : 0;
+    csbit |= (currprefs.chipset_mask & CSMASK_AGA) ? 0x2300 : 0;
+    csbit |= (currprefs.chipset_mask & CSMASK_ECS_AGNUS) ? 0x2000 : 0;
+    return (vpos >> 8) | lof | csbit;
+}
+static void VPOSW (uae_u16 v)
+{
+    if (lof != (v & 0x8000))
+	lof_changed = 1;
+    lof = v & 0x8000;
+    /*
+     * This register is much more fun on a real Amiga. You can program
+     * refresh rates with it ;) But I won't emulate this...
+     */
+}
+
+STATIC_INLINE uae_u16 VHPOSR (void)
+{
+    return (vpos << 8) | current_hpos ();
+}
+
+STATIC_INLINE void COP1LCH (uae_u16 v) { cop1lc = (cop1lc & 0xffff) | ((uae_u32)v << 16); }
+STATIC_INLINE void COP1LCL (uae_u16 v) { cop1lc = (cop1lc & ~0xffff) | (v & 0xfffe); }
+STATIC_INLINE void COP2LCH (uae_u16 v) { cop2lc = (cop2lc & 0xffff) | ((uae_u32)v << 16); }
+STATIC_INLINE void COP2LCL (uae_u16 v) { cop2lc = (cop2lc & ~0xffff) | (v & 0xfffe); }
+
+static void start_copper (void)
+{
+    int was_active = eventtab[ev_copper].active;
+    eventtab[ev_copper].active = 0;
+    if (was_active)
+	events_schedule ();
+
+    cop_state.ignore_next = 0;
+    cop_state.state = COP_read1;
+    cop_state.vpos = vpos;
+    cop_state.hpos = current_hpos () & ~1;
+
+    if (dmaen (DMA_COPPER)) {
+	copper_enabled_thisline = 1;
+	set_special (SPCFLAG_COPPER);
+    }
+}
+
+static void COPJMP1 (uae_u16 a)
+{
+    cop_state.ip = cop1lc;
+    start_copper ();
+}
+
+static void COPJMP2 (uae_u16 a)
+{
+    cop_state.ip = cop2lc;
+    start_copper ();
+}
+
+STATIC_INLINE void COPCON (uae_u16 a)
+{
+    copcon = a;
+}
+
+static void DMACON (int hpos, uae_u16 v)
+{
+    int i;
+
+    uae_u16 oldcon = dmacon;
+
+    decide_line (hpos);
+    decide_fetch (hpos);
+
+    setclr (&dmacon, v);
+    dmacon &= 0x1FFF;
+
+    /* FIXME? Maybe we need to think a bit more about the master DMA enable
+     * bit in these cases. */
+    if ((dmacon & DMA_COPPER) != (oldcon & DMA_COPPER)) {
+	eventtab[ev_copper].active = 0;
+    }
+    if ((dmacon & DMA_COPPER) > (oldcon & DMA_COPPER)) {
+	cop_state.ip = cop1lc;
+	cop_state.ignore_next = 0;
+	cop_state.state = COP_read1;
+	cop_state.vpos = vpos;
+	cop_state.hpos = hpos & ~1;
+	copper_enabled_thisline = 1;
+	set_special (SPCFLAG_COPPER);
+    }
+    if (! (dmacon & DMA_COPPER)) {
+	copper_enabled_thisline = 0;
+	unset_special (SPCFLAG_COPPER);
+	cop_state.state = COP_stop;
+    }
+
+    if ((dmacon & DMA_BLITPRI) > (oldcon & DMA_BLITPRI) && bltstate != BLT_done) {
+	static int count = 0;
+	if (!count) {
+	    count = 1;
+	    write_log ("warning: program is doing blitpri hacks.\n");
+	}
+	set_special (SPCFLAG_BLTNASTY);
+    }
+    if ((dmacon & (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER)) != (DMA_BLITPRI | DMA_BLITTER | DMA_MASTER))
+	unset_special (SPCFLAG_BLTNASTY);
+
+    if (currprefs.produce_sound > 0) {
+	update_audio ();
+
+	for (i = 0; i < 4; i++) {
+	    struct audio_channel_data *cdp = audio_channel + i;
+	    int chan_ena = (dmacon & 0x200) && (dmacon & (1<<i));
+	    if (cdp->dmaen == chan_ena)
+		continue;
+	    cdp->dmaen = chan_ena;
+	    if (cdp->dmaen)
+		audio_channel_enable_dma (cdp);
+	    else
+		audio_channel_disable_dma (cdp);
+	}
+	schedule_audio ();
+    }
+    events_schedule();
+}
+
+/*static int trace_intena = 0;*/
+
+STATIC_INLINE void INTENA (uae_u16 v)
+{
+/*    if (trace_intena)
+	write_log ("INTENA: %04x\n", v);*/
+    setclr (&intena,v);
+    /* There's stupid code out there that does
+	[some INTREQ bits at level 3 are set]
+	clear all INTREQ bits
+	Enable one INTREQ level 3 bit
+	Set level 3 handler
+
+	If we set SPCFLAG_INT for the clear, then by the time the enable happens,
+	we'll have SPCFLAG_DOINT set, and the interrupt happens immediately, but
+	it needs to happen one insn later, when the new L3 handler has been
+	installed.  */
+    if (v & 0x8000)
+	set_special (SPCFLAG_INT);
+}
+
+void INTREQ_0 (uae_u16 v)
+{
+    setclr (&intreq,v);
+    set_special (SPCFLAG_INT);
+}
+
+void INTREQ (uae_u16 v)
+{
+    INTREQ_0 (v);
+    if ((v & 0x8800) == 0x0800)
+	serdat &= 0xbfff;
+    rethink_cias ();
+}
+
+static void update_adkmasks (void)
+{
+    unsigned long t;
+
+    t = adkcon | (adkcon >> 4);
+    audio_channel[0].adk_mask = (((t >> 0) & 1) - 1);
+    audio_channel[1].adk_mask = (((t >> 1) & 1) - 1);
+    audio_channel[2].adk_mask = (((t >> 2) & 1) - 1);
+    audio_channel[3].adk_mask = (((t >> 3) & 1) - 1);
+}
+
+static void ADKCON (uae_u16 v)
+{
+    if (currprefs.produce_sound > 0)
+	update_audio ();
+
+    setclr (&adkcon,v);
+    update_adkmasks ();
+}
+
+static void BEAMCON0 (uae_u16 v)
+{
+    if (currprefs.chipset_mask & CSMASK_ECS_AGNUS)
+	new_beamcon0 = v & 0x20;
+}
+
+static void BPLPTH (int hpos, uae_u16 v, int num)
+{
+    decide_line (hpos);
+    decide_fetch (hpos);
+    bplpt[num] = (bplpt[num] & 0xffff) | ((uae_u32)v << 16);
+}
+static void BPLPTL (int hpos, uae_u16 v, int num)
+{
+    decide_line (hpos);
+    decide_fetch (hpos);
+    bplpt[num] = (bplpt[num] & ~0xffff) | (v & 0xfffe);
+}
+
+static void BPLCON0 (int hpos, uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_ECS_DENISE))
+	v &= ~0x00F1;
+    else if (! (currprefs.chipset_mask & CSMASK_AGA))
+	v &= ~0x00B1;
+
+    if (bplcon0 == v)
+	return;
+    decide_line (hpos);
+    decide_fetch (hpos);
+
+    /* HAM change?  */
+    if ((bplcon0 ^ v) & 0x800) {
+	record_color_change (hpos, -1, !! (v & 0x800));
+    }
+    
+    bplcon0 = v;
+    curr_diagram = cycle_diagram_table[fetchmode][GET_RES(bplcon0)][GET_PLANES (v)];
+
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	decide_sprites (hpos);
+	expand_sprres ();
+    }
+
+    expand_fmodes ();
+}
+
+STATIC_INLINE void BPLCON1 (int hpos, uae_u16 v)
+{
+    if (bplcon1 == v)
+	return;
+    decide_line (hpos);
+    decide_fetch (hpos);
+    bplcon1 = v;
+}
+
+STATIC_INLINE void BPLCON2 (int hpos, uae_u16 v)
+{
+    if (bplcon2 == v)
+	return;
+    decide_line (hpos);
+    bplcon2 = v;
+}
+
+STATIC_INLINE void BPLCON3 (int hpos, uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_AGA))
+	return;
+    if (bplcon3 == v)
+	return;
+    decide_line (hpos);
+    decide_sprites (hpos);
+    bplcon3 = v;
+    expand_sprres ();
+}
+
+STATIC_INLINE void BPLCON4 (int hpos, uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_AGA))
+	return;
+    if (bplcon4 == v)
+	return;
+    decide_line (hpos);
+    bplcon4 = v;
+}
+
+static void BPL1MOD (int hpos, uae_u16 v)
+{
+    v &= ~1;
+    if ((uae_s16)bpl1mod == (uae_s16)v)
+	return;
+    decide_line (hpos);
+    decide_fetch (hpos);
+    bpl1mod = v;
+}
+
+static void BPL2MOD (int hpos, uae_u16 v)
+{
+    v &= ~1;
+    if ((uae_s16)bpl2mod == (uae_s16)v)
+	return;
+    decide_line (hpos);
+    decide_fetch (hpos);
+    bpl2mod = v;
+}
+
+STATIC_INLINE void BPL1DAT (int hpos, uae_u16 v)
+{
+    decide_line (hpos);
+    bpl1dat = v;
+
+    maybe_first_bpl1dat (hpos);
+}
+/* We could do as well without those... */
+STATIC_INLINE void BPL2DAT (uae_u16 v) { bpl2dat = v; }
+STATIC_INLINE void BPL3DAT (uae_u16 v) { bpl3dat = v; }
+STATIC_INLINE void BPL4DAT (uae_u16 v) { bpl4dat = v; }
+STATIC_INLINE void BPL5DAT (uae_u16 v) { bpl5dat = v; }
+STATIC_INLINE void BPL6DAT (uae_u16 v) { bpl6dat = v; }
+STATIC_INLINE void BPL7DAT (uae_u16 v) { bpl7dat = v; }
+STATIC_INLINE void BPL8DAT (uae_u16 v) { bpl8dat = v; }
+
+static void DIWSTRT (int hpos, uae_u16 v)
+{
+    if (diwstrt == v && ! diwhigh_written)
+	return;
+    decide_line (hpos);
+    diwhigh_written = 0;
+    diwstrt = v;
+    calcdiw ();
+}
+
+static void DIWSTOP (int hpos, uae_u16 v)
+{
+    if (diwstop == v && ! diwhigh_written)
+	return;
+    decide_line (hpos);
+    diwhigh_written = 0;
+    diwstop = v;
+    calcdiw ();
+}
+
+static void DIWHIGH (int hpos, uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_ECS_DENISE))
+	return;
+    if (diwhigh_written && diwhigh == v)
+	return;
+    decide_line (hpos);
+    diwhigh_written = 1;
+    diwhigh = v;
+    calcdiw ();
+}
+
+static void DDFSTRT (int hpos, uae_u16 v)
+{
+    v &= 0xFC;
+    if (ddfstrt == v)
+	return;
+    decide_line (hpos);
+    ddfstrt = v;
+    calcdiw ();
+    if (ddfstop > 0xD4 && (ddfstrt & 4) == 4) {
+	static int last_warned;
+	last_warned = (last_warned + 1) & 4095;
+	if (last_warned == 0)
+	    write_log ("WARNING! Very strange DDF values.\n");
+    }
+}
+
+static void DDFSTOP (int hpos, uae_u16 v)
+{
+    /* ??? "Virtual Meltdown" sets this to 0xD2 and expects it to behave
+       differently from 0xD0.  RSI Megademo sets it to 0xd1 and expects it
+       to behave like 0xd0.  Some people also write the high 8 bits and
+       expect them to be ignored.  So mask it with 0xFE.  */
+    v &= 0xFE;
+    if (ddfstop == v)
+	return;
+    decide_line (hpos);
+    decide_fetch (hpos);
+    ddfstop = v;
+    calcdiw ();
+    if (fetch_state != fetch_not_started)
+	estimate_last_fetch_cycle (hpos);
+    if (ddfstop > 0xD4 && (ddfstrt & 4) == 4) {
+	static int last_warned;
+	last_warned = (last_warned + 1) & 4095;
+	if (last_warned == 0)
+	    write_log ("WARNING! Very strange DDF values.\n");
+	write_log ("WARNING! Very strange DDF values.\n");
+    }
+}
+
+static void FMODE (uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_AGA))
+	v = 0;
+
+    fmode = v;
+    sprite_width = GET_SPRITEWIDTH (fmode);
+    switch (fmode & 3) {
+    case 0:
+	fetchmode = 0;
+	break;
+    case 1:
+    case 2:
+	fetchmode = 1;
+	break;
+    case 3:
+	fetchmode = 2;
+	break;
+    }
+    curr_diagram = cycle_diagram_table[fetchmode][GET_RES (v)][GET_PLANES (bplcon0)];
+    expand_fmodes ();
+}
+
+static void BLTADAT (uae_u16 v)
+{
+    maybe_blit (0);
+
+    blt_info.bltadat = v;
+}
+/*
+ * "Loading data shifts it immediately" says the HRM. Well, that may
+ * be true for BLTBDAT, but not for BLTADAT - it appears the A data must be
+ * loaded for every word so that AFWM and ALWM can be applied.
+ */
+static void BLTBDAT (uae_u16 v)
+{
+    maybe_blit (0);
+
+    if (bltcon1 & 2)
+	blt_info.bltbhold = v << (bltcon1 >> 12);
+    else
+	blt_info.bltbhold = v >> (bltcon1 >> 12);
+    blt_info.bltbdat = v;
+}
+static void BLTCDAT (uae_u16 v) { maybe_blit (0); blt_info.bltcdat = v; }
+
+static void BLTAMOD (uae_u16 v) { maybe_blit (1); blt_info.bltamod = (uae_s16)(v & 0xFFFE); }
+static void BLTBMOD (uae_u16 v) { maybe_blit (1); blt_info.bltbmod = (uae_s16)(v & 0xFFFE); }
+static void BLTCMOD (uae_u16 v) { maybe_blit (1); blt_info.bltcmod = (uae_s16)(v & 0xFFFE); }
+static void BLTDMOD (uae_u16 v) { maybe_blit (1); blt_info.bltdmod = (uae_s16)(v & 0xFFFE); }
+
+static void BLTCON0 (uae_u16 v) { maybe_blit (0); bltcon0 = v; blinea_shift = v >> 12; }
+/* The next category is "Most useless hardware register".
+ * And the winner is... */
+static void BLTCON0L (uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS))
+	return;
+    maybe_blit (0); bltcon0 = (bltcon0 & 0xFF00) | (v & 0xFF);
+}
+static void BLTCON1 (uae_u16 v) { maybe_blit (0); bltcon1 = v; }
+
+static void BLTAFWM (uae_u16 v) { maybe_blit (0); blt_info.bltafwm = v; }
+static void BLTALWM (uae_u16 v) { maybe_blit (0); blt_info.bltalwm = v; }
+
+static void BLTAPTH (uae_u16 v) { maybe_blit (0); bltapt = (bltapt & 0xffff) | ((uae_u32)v << 16); }
+static void BLTAPTL (uae_u16 v) { maybe_blit (0); bltapt = (bltapt & ~0xffff) | (v & 0xFFFE); }
+static void BLTBPTH (uae_u16 v) { maybe_blit (0); bltbpt = (bltbpt & 0xffff) | ((uae_u32)v << 16); }
+static void BLTBPTL (uae_u16 v) { maybe_blit (0); bltbpt = (bltbpt & ~0xffff) | (v & 0xFFFE); }
+static void BLTCPTH (uae_u16 v) { maybe_blit (0); bltcpt = (bltcpt & 0xffff) | ((uae_u32)v << 16); }
+static void BLTCPTL (uae_u16 v) { maybe_blit (0); bltcpt = (bltcpt & ~0xffff) | (v & 0xFFFE); }
+static void BLTDPTH (uae_u16 v) { maybe_blit (0); bltdpt = (bltdpt & 0xffff) | ((uae_u32)v << 16); }
+static void BLTDPTL (uae_u16 v) { maybe_blit (0); bltdpt = (bltdpt & ~0xffff) | (v & 0xFFFE); }
+
+static void BLTSIZE (uae_u16 v)
+{
+    maybe_blit (0);
+
+    blt_info.vblitsize = v >> 6;
+    blt_info.hblitsize = v & 0x3F;
+    if (!blt_info.vblitsize) blt_info.vblitsize = 1024;
+    if (!blt_info.hblitsize) blt_info.hblitsize = 64;
+
+    bltstate = BLT_init;
+    do_blitter ();
+}
+
+static void BLTSIZV (uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS))
+	return;
+    maybe_blit (0);
+    oldvblts = v & 0x7FFF;
+}
+
+static void BLTSIZH (uae_u16 v)
+{
+    if (! (currprefs.chipset_mask & CSMASK_ECS_AGNUS))
+	return;
+    maybe_blit (0);
+    blt_info.hblitsize = v & 0x7FF;
+    blt_info.vblitsize = oldvblts;
+    if (!blt_info.vblitsize) blt_info.vblitsize = 32768;
+    if (!blt_info.hblitsize) blt_info.hblitsize = 0x800;
+    bltstate = BLT_init;
+    do_blitter ();
+}
+
+STATIC_INLINE void SPRxCTL_1 (uae_u16 v, int num, int hpos)
+{
+    int sprxp;
+    struct sprite *s = &spr[num];
+    sprctl[num] = v;
+    nr_armed -= s->armed;
+    s->armed = 0;
+    sprxp = (sprpos[num] & 0xFF) * 2 + (v & 1);
+
+    /* Quite a bit salad in this register... */
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	/* We ignore the SHRES 35ns increment for now; SHRES support doesn't
+	   work anyway, so we may as well restrict AGA sprites to a 70ns
+	   resolution.  */
+	sprxp <<= 1;
+	sprxp |= (v >> 4) & 1;
+    }
+    s->xpos = sprxp;
+    s->vstart = (sprpos[num] >> 8) | ((sprctl[num] << 6) & 0x100);
+    s->vstop = (sprctl[num] >> 8) | ((sprctl[num] << 7) & 0x100);
+    if (vpos == s->vstart)
+	s->state = SPR_waiting_stop;
+#ifdef SPRITE_DEBUG
+    write_log ("%d:%d:SPR%dCTL V=%04.4X STATE=%d ARMED=%d\n", vpos, hpos, num, v, s->state, s->armed);
+#endif
+}
+STATIC_INLINE void SPRxPOS_1 (uae_u16 v, int num, int hpos)
+{
+    int sprxp;
+    struct sprite *s = &spr[num];
+    sprpos[num] = v;
+    sprxp = (v & 0xFF) * 2 + (sprctl[num] & 1);
+
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	sprxp <<= 1;
+	sprxp |= (sprctl[num] >> 4) & 1;
+    }
+    s->xpos = sprxp;
+    s->vstart = (sprpos[num] >> 8) | ((sprctl[num] << 6) & 0x100);
+#ifdef SPRITE_DEBUG
+    write_log ("%d:%d:SPR%dPOS %04.4X STATE=%d ARMED=%d\n", vpos, hpos, num, v, s->state, s->armed);
+#endif
+}
+STATIC_INLINE void SPRxDATA_1 (uae_u16 v, int num)
+{
+    sprdata[num][0] = v;
+    nr_armed += 1 - spr[num].armed;
+    spr[num].armed = 1;
+}
+STATIC_INLINE void SPRxDATB_1 (uae_u16 v, int num)
+{
+    sprdatb[num][0] = v;
+}
+static void SPRxDATA (int hpos, uae_u16 v, int num) { decide_sprites (hpos); SPRxDATA_1 (v, num); }
+static void SPRxDATB (int hpos, uae_u16 v, int num) { decide_sprites (hpos); SPRxDATB_1 (v, num); }
+static void SPRxCTL (int hpos, uae_u16 v, int num) { decide_sprites (hpos); SPRxCTL_1 (v, num, hpos); }
+static void SPRxPOS (int hpos, uae_u16 v, int num) { decide_sprites (hpos); SPRxPOS_1 (v, num, hpos); }
+static void SPRxPTH (int hpos, uae_u16 v, int num)
+{
+    decide_sprites (hpos);
+    spr[num].pt &= 0xffff;
+    spr[num].pt |= (uae_u32)v << 16;
+#ifdef SPRITE_DEBUG
+    write_log ("%d:%d:SPR%dPTH %08.8X\n", vpos, hpos, num, spr[num].pt);
+#endif
+}
+static void SPRxPTL (int hpos, uae_u16 v, int num)
+{
+    decide_sprites (hpos);
+    spr[num].pt &= ~0xffff;
+    spr[num].pt |= v;
+#ifdef SPRITE_DEBUG
+    write_log ("%d:%d:SPR%dPTL %08.8X\n", vpos, hpos, num, spr[num].pt);
+#endif
+}
+
+static void CLXCON (uae_u16 v)
+{
+    clxcon = v;
+    clxcon_bpl_enable = (v >> 6) & 63;
+    clxcon_bpl_match = v & 63;
+    clx_sprmask = ((((v >> 15) & 1) << 7) | (((v >> 14) & 1) << 5) | (((v >> 13) & 1) << 3) | (((v >> 12) & 1) << 1) | 0x55);
+}
+static void CLXCON2 (uae_u16 v)
+{
+    if (!(currprefs.chipset_mask & CSMASK_AGA))
+	return;
+    clxcon2 = v;
+    clxcon_bpl_enable |= v & (0x40|0x80);
+    clxcon_bpl_match |= (v & (0x01|0x02)) << 6;
+ }
+static uae_u16 CLXDAT (void)
+{
+    uae_u16 v = clxdat;
+    clxdat = 0;
+    return v;
+}
+
+static uae_u16 COLOR_READ (int num)
+{
+    int cr, cg, cb, colreg;
+    uae_u16 cval;
+
+    if (!(currprefs.chipset_mask & CSMASK_AGA) || !(bplcon2 & 0x0100))
+	return 0xffff;
+
+    colreg = ((bplcon3 >> 13) & 7) * 32 + num;
+    cr = current_colors.color_regs_aga[colreg] >> 16;
+    cg = (current_colors.color_regs_aga[colreg] >> 8) & 0xFF;
+    cb = current_colors.color_regs_aga[colreg] & 0xFF;
+    if (bplcon3 & 0x200)
+	cval = ((cr & 15) << 8) | ((cg & 15) << 4) | ((cb & 15) << 0);
+    else
+	cval = ((cr >> 4) << 8) | ((cg >> 4) << 4) | ((cb >> 4) << 0);
+    return cval;
+}
+
+static void COLOR_WRITE (int hpos, uae_u16 v, int num)
+{
+    v &= 0xFFF;
+    if (currprefs.chipset_mask & CSMASK_AGA) {
+	int r,g,b;
+	int cr,cg,cb;
+	int colreg;
+	uae_u32 cval;
+
+	/* writing is disabled when RDRAM=1 */
+	if (bplcon2 & 0x0100)
+	    return;
+
+	colreg = ((bplcon3 >> 13) & 7) * 32 + num;
+	r = (v & 0xF00) >> 8;
+	g = (v & 0xF0) >> 4;
+	b = (v & 0xF) >> 0;
+	cr = current_colors.color_regs_aga[colreg] >> 16;
+	cg = (current_colors.color_regs_aga[colreg] >> 8) & 0xFF;
+	cb = current_colors.color_regs_aga[colreg] & 0xFF;
+
+	if (bplcon3 & 0x200) {
+	    cr &= 0xF0; cr |= r;
+	    cg &= 0xF0; cg |= g;
+	    cb &= 0xF0; cb |= b;
+	} else {
+	    cr = r + (r << 4);
+	    cg = g + (g << 4);
+	    cb = b + (b << 4);
+	}
+	cval = (cr << 16) | (cg << 8) | cb;
+	if (cval == current_colors.color_regs_aga[colreg])
+	    return;
+
+	/* Call this with the old table still intact. */
+	record_color_change (hpos, colreg, cval);
+	remembered_color_entry = -1;
+	current_colors.color_regs_aga[colreg] = cval;
+	current_colors.acolors[colreg] = CONVERT_RGB (cval);
+   } else {
+	if (current_colors.color_regs_ecs[num] == v)
+	    return;
+	/* Call this with the old table still intact. */
+	record_color_change (hpos, num, v);
+	remembered_color_entry = -1;
+	current_colors.color_regs_ecs[num] = v;
+	current_colors.acolors[num] = xcolors[v];
+    }
+}
+
+static uae_u16 potgo_value;
+
+static void POTGO (uae_u16 v)
+{
+    potgo_value = v;
+}
+
+static uae_u16 POTGOR (void)
+{
+    uae_u16 v = (potgo_value | (potgo_value >> 1)) & 0x5500;
+
+    v |= (~potgo_value & 0xAA00) >> 1;
+
+    if (JSEM_ISMOUSE (0, &currprefs)) {
+	if (buttonstate[2])
+	    v &= 0xFBFF;
+
+	if (buttonstate[1])
+	    v &= 0xFEFF;
+    } else if (JSEM_ISJOY0 (0, &currprefs) || JSEM_ISJOY1 (0, &currprefs)) {
+	if (joy0button & 2) v &= 0xfbff;
+	if (joy0button & 4) v &= 0xfeff;
+    }
+
+    if (JSEM_ISJOY0 (1, &currprefs) || JSEM_ISJOY1 (1, &currprefs)) {
+	if (joy1button & 2) v &= 0xbfff;
+	if (joy1button & 4) v &= 0xefff;
+    }
+
+    return v;
+}
+
+static uae_u16 POT0DAT (void)
+{
+    static uae_u16 cnt = 0;
+    if (JSEM_ISMOUSE (0, &currprefs)) {
+	if (buttonstate[2])
+	    cnt = ((cnt + 1) & 0xFF) | (cnt & 0xFF00);
+	if (buttonstate[1])
+	    cnt += 0x100;
+    }
+
+    return cnt;
+}
+static uae_u16 JOY0DAT (void)
+{
+    if (JSEM_ISMOUSE (0, &currprefs)) {
+	do_mouse_hack ();
+	return ((uae_u8)mouse_x) + ((uae_u16)mouse_y << 8);
+    }
+    return joy0dir;
+}
+static uae_u16 JOY1DAT (void)
+{
+    if (JSEM_ISMOUSE (1, &currprefs)) {
+	do_mouse_hack ();
+	return ((uae_u8)mouse_x) + ((uae_u16)mouse_y << 8);
+    }
+    return joy1dir;
+}
+static void JOYTEST (uae_u16 v)
+{
+    if (JSEM_ISMOUSE (0, &currprefs)) {
+	mouse_x = v & 0xFC;
+	mouse_y = (v >> 8) & 0xFC;
+    }
+}
+
+/* The copper code.  The biggest nightmare in the whole emulator.
+
+   Alright.  The current theory:
+   1. Copper moves happen 2 cycles after state READ2 is reached.
+      It can't happen immediately when we reach READ2, because the
+      data needs time to get back from the bus.  An additional 2
+      cycles are needed for non-Agnus registers, to take into account
+      the delay for moving data from chip to chip.
+   2. As stated in the HRM, a WAIT really does need an extra cycle
+      to wake up.  This is implemented by _not_ falling through from
+      a successful wait to READ1, but by starting the next cycle.
+      (Note: the extra cycle for the WAIT apparently really needs a
+      free cycle; i.e. contention with the bitplane fetch can slow
+      it down).
+   3. Apparently, to compensate for the extra wake up cycle, a WAIT
+      will use the _incremented_ horizontal position, so the WAIT
+      cycle normally finishes two clocks earlier than the position
+      it was waiting for.  The extra cycle then takes us to the
+      position that was waited for.
+      If the earlier cycle is busy with a bitplane, things change a bit.
+      E.g., waiting for position 0x50 in a 6 plane display: In cycle
+      0x4e, we fetch BPL5, so the wait wakes up in 0x50, the extra cycle
+      takes us to 0x54 (since 0x52 is busy), then we have READ1/READ2,
+      and the next register write is at 0x5c.
+   4. The last cycle in a line is not usable for the copper.
+   5. A 4 cycle delay also applies to the WAIT instruction.  This means
+      that the second of two back-to-back WAITs (or a WAIT whose
+      condition is immediately true) takes 8 cycles.
+   6. This also applies to a SKIP instruction.  The copper does not
+      fetch the next instruction while waiting for the second word of
+      a WAIT or a SKIP to arrive.
+   7. A SKIP also seems to need an unexplained additional two cycles
+      after its second word arrives; this is _not_ a memory cycle (I
+      think, the documentation is pretty clear on this).
+   8. Two additional cycles are inserted when writing to COPJMP1/2.  */
+
+/* Determine which cycles are available for the copper in a display
+ * with a agiven number of planes.  */
+
+STATIC_INLINE int copper_cant_read (int hpos)
+{
+    int t;
+
+    if (hpos + 1 >= maxhpos)
+	return 1;
+
+    if (fetch_state == fetch_not_started || hpos < thisline_decision.plfleft)
+	return 0;
+
+    if ((passed_plfstop == 3 && hpos >= thisline_decision.plfright)
+	|| hpos >= estimated_last_fetch_cycle)
+	return 0;
+
+    t = curr_diagram[(hpos + cycle_diagram_shift) & fetchstart_mask];
+#if 0
+    if (t == -1)
+	abort ();
+#endif
+    return t;
+}
+
+STATIC_INLINE int dangerous_reg (int reg)
+{
+    /* Safe:
+     * Bitplane pointers, control registers, modulos and data.
+     * Sprite pointers, control registers, and data.
+     * Color registers.  */
+    if (reg >= 0xE0 && reg < 0x1C0)
+	return 0;
+    return 1;
+}
+
+#define FAST_COPPER 1
+
+/* The future, Conan?
+   We try to look ahead in the copper list to avoid doing continuous calls
+   to updat_copper (which is what happens when SPCFLAG_COPPER is set).  If
+   we find that the same effect can be achieved by setting a delayed event
+   and then doing multiple copper insns in one batch, we can get a massive
+   speedup.
+
+   We don't try to be precise here.  All copper reads take exactly 2 cycles,
+   the effect of bitplane contention is ignored.  Trying to get it exactly
+   right would be much more complex and as such carry a huge risk of getting
+   it subtly wrong; and it would also be more expensive - we want this code
+   to be fast.  */
+static void predict_copper (void)
+{
+    uaecptr ip = cop_state.ip;
+    unsigned int c_hpos = cop_state.hpos;
+    enum copper_states state = cop_state.state;
+    unsigned int w1, w2, cycle_count;
+
+    switch (state) {
+    case COP_read1_wr_in2:
+    case COP_read2_wr_in2:
+    case COP_read1_wr_in4:
+	if (dangerous_reg (cop_state.saved_i1))
+	    return;
+	state = state == COP_read2_wr_in2 ? COP_read2 : COP_read1;
+	break;
+
+    case COP_read1_in2:
+	c_hpos += 2;
+	state = COP_read1;
+	break;
+
+    case COP_stop:
+    case COP_bltwait:
+    case COP_wait1:
+    case COP_skip_in4:
+    case COP_skip_in2:
+	return;
+
+    case COP_wait_in4:
+	c_hpos += 2;
+	/* fallthrough */
+    case COP_wait_in2:
+	c_hpos += 2;
+	/* fallthrough */
+    case COP_wait:
+	state = COP_wait;
+	break;
+
+    default:
+	break;
+    }
+    /* Only needed for COP_wait, but let's shut up the compiler.  */
+    w1 = cop_state.saved_i1;
+    w2 = cop_state.saved_i2;
+    cop_state.first_sync = c_hpos;
+    cop_state.regtypes_modified = REGTYPE_FORCE;
+
+    /* Get this case out of the way, so that the loop below only has to deal
+       with read1 and wait.  */
+    if (state == COP_read2) {
+	w1 = cop_state.i1;
+	if (w1 & 1) {
+	    w2 = chipmem_wget (ip);
+	    if (w2 & 1)
+		goto done;
+	    state = COP_wait;
+	    c_hpos += 4;
+	} else if (dangerous_reg (w1)) {
+	    c_hpos += 4;
+	    goto done;
+	} else {
+	    cop_state.regtypes_modified |= regtypes[w1 & 0x1FE];
+	    state = COP_read1;
+	    c_hpos += 2;
+	}
+	ip += 2;	
+    }
+
+    while (c_hpos + 1 < maxhpos) {
+	if (state == COP_read1) {
+	    w1 = chipmem_wget (ip);
+	    if (w1 & 1) {
+		w2 = chipmem_wget (ip + 2);
+		if (w2 & 1)
+		    break;
+		state = COP_wait;
+		c_hpos += 6;
+	    } else if (dangerous_reg (w1)) {
+		c_hpos += 6;
+		goto done;
+	    } else {
+		cop_state.regtypes_modified |= regtypes[w1 & 0x1FE];
+		c_hpos += 4;
+	    }
+	    ip += 4;
+	} else if (state == COP_wait) {
+	    if ((w2 & 0xFE) != 0xFE)
+		break;
+	    else {
+		unsigned int vcmp = (w1 & (w2 | 0x8000)) >> 8;
+		unsigned int hcmp = (w1 & 0xFE);
+
+		unsigned int vp = vpos & (((w2 >> 8) & 0x7F) | 0x80);
+		if (vp < vcmp) {
+		    /* Whee.  We can wait until the end of the line!  */
+		    c_hpos = maxhpos;
+		} else if (vp > vcmp || hcmp <= c_hpos) {
+		    state = COP_read1;
+		    /* minimum wakeup time */
+		    c_hpos += 2;
+		} else {
+		    state = COP_read1;
+		    c_hpos = hcmp;
+		}
+		/* If this is the current instruction, remember that we don't
+		   need to sync CPU and copper anytime soon.  */
+		if (cop_state.ip == ip) {
+		    cop_state.first_sync = c_hpos;
+		}
+	    }
+	} else
+	    abort ();
+    }
+
+  done:
+    cycle_count = c_hpos - cop_state.hpos;
+    if (cycle_count >= 8) {
+	unset_special (SPCFLAG_COPPER);
+	eventtab[ev_copper].active = 1;
+	eventtab[ev_copper].oldcycles = get_cycles ();
+	eventtab[ev_copper].evtime = get_cycles () + cycle_count * CYCLE_UNIT;
+	events_schedule ();
+    }
+}
+
+static void perform_copper_write (int old_hpos)
+{
+    int vp = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80);
+    unsigned int address = cop_state.saved_i1 & 0x1FE;
+
+    record_copper (cop_state.saved_ip - 4, old_hpos, vpos);
+
+    if (address < (copcon & 2 ? ((currprefs.chipset_mask & CSMASK_AGA) ? 0 : 0x40u) : 0x80u)) {
+	cop_state.state = COP_stop;	
+	copper_enabled_thisline = 0;
+	unset_special (SPCFLAG_COPPER);
+	return;
+    }
+
+    if (address == 0x88) {
+	cop_state.ip = cop1lc;
+	cop_state.state = COP_read1_in2;
+    } else if (address == 0x8A) {
+	cop_state.ip = cop2lc;
+	cop_state.state = COP_read1_in2;
+    } else
+	custom_wput_1 (old_hpos, address, cop_state.saved_i2);
+}
+
+static int isagnus[]= {
+    1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
+    1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* BPLxPT */
+    0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* SPRxPT */
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* colors */
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static void update_copper (int until_hpos)
+{
+    int vp = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80);    
+    int c_hpos = cop_state.hpos;
+
+    if (eventtab[ev_copper].active)
+	abort ();
+
+    if (cop_state.state == COP_wait && vp < cop_state.vcmp)
+	abort ();
+
+    until_hpos &= ~1;
+
+    if (until_hpos > (maxhpos & ~1))
+	until_hpos = maxhpos & ~1;
+
+    until_hpos += 2;
+    for (;;) {
+	int old_hpos = c_hpos;
+	int hp;
+
+	if (c_hpos >= until_hpos)
+	    break;
+
+	/* So we know about the fetch state.  */
+	decide_line (c_hpos);
+
+	switch (cop_state.state) {
+	case COP_read1_in2:
+	    cop_state.state = COP_read1;
+	    break;
+	case COP_read1_wr_in2:
+	    cop_state.state = COP_read1;
+	    perform_copper_write (old_hpos);
+	    /* That could have turned off the copper.  */
+	    if (! copper_enabled_thisline)
+		goto out;
+
+	    break;
+	case COP_read1_wr_in4:
+	    cop_state.state = COP_read1_wr_in2;
+	    break;
+	case COP_read2_wr_in2:
+	    cop_state.state = COP_read2;
+	    perform_copper_write (old_hpos);
+	    /* That could have turned off the copper.  */
+	    if (! copper_enabled_thisline)
+		goto out;
+
+	    break;
+	case COP_wait_in2:
+	    cop_state.state = COP_wait1;
+	    break;
+	case COP_wait_in4:
+	    cop_state.state = COP_wait_in2;
+	    break;
+	case COP_skip_in2:
+	{
+	    static int skipped_before;
+	    unsigned int vcmp, hcmp, vp1, hp1;
+	    cop_state.state = COP_read1_in2;
+
+	    vcmp = (cop_state.saved_i1 & (cop_state.saved_i2 | 0x8000)) >> 8;
+	    hcmp = (cop_state.saved_i1 & cop_state.saved_i2 & 0xFE);
+
+	    if (! skipped_before) {
+		skipped_before = 1;
+		write_log ("Program uses Copper SKIP instruction.\n");
+	    }
+
+	    vp1 = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80);
+	    hp1 = old_hpos & (cop_state.saved_i2 & 0xFE);
+
+	    if ((vp1 > vcmp || (vp1 == vcmp && hp1 >= hcmp))
+		&& ((cop_state.saved_i2 & 0x8000) != 0 || ! (DMACONR() & 0x4000)))
+		cop_state.ignore_next = 1;
+	    break;
+	}
+	case COP_skip_in4:
+	    cop_state.state = COP_skip_in2;
+	    break;
+	default:
+	    break;
+	}
+
+	c_hpos += 2;
+	if (copper_cant_read (old_hpos))
+	    continue;
+
+	switch (cop_state.state) {
+	case COP_read1_wr_in4:
+	    abort ();
+
+	case COP_read1_wr_in2:
+	case COP_read1:
+	    cop_state.i1 = chipmem_wget (cop_state.ip);
+	    cop_state.ip += 2;
+	    cop_state.state = cop_state.state == COP_read1 ? COP_read2 : COP_read2_wr_in2;
+	    break;
+
+	case COP_read2_wr_in2:
+	    abort ();
+
+	case COP_read2:
+	    cop_state.i2 = chipmem_wget (cop_state.ip);
+	    cop_state.ip += 2;
+	    if (cop_state.ignore_next) {
+		cop_state.ignore_next = 0;
+		cop_state.state = COP_read1;
+		break;
+	    }
+
+	    cop_state.saved_i1 = cop_state.i1;
+	    cop_state.saved_i2 = cop_state.i2;
+	    cop_state.saved_ip = cop_state.ip;
+
+	    if (cop_state.i1 & 1) {
+		if (cop_state.i2 & 1)
+		    cop_state.state = COP_skip_in4;
+		else
+		    cop_state.state = COP_wait_in4;
+	    } else {
+		unsigned int reg = cop_state.i1 & 0x1FE;
+		cop_state.state = isagnus[reg >> 1] ? COP_read1_wr_in2 : COP_read1_wr_in4;
+	    }
+	    break;
+
+	case COP_wait1:
+	    /* There's a nasty case here.  As stated in the "Theory" comment above, we
+	       test against the incremented copper position.  I believe this means that
+	       we have to increment the _vertical_ position at the last cycle in the line,
+	       and set the horizontal position to 0.
+	       Normally, this isn't going to make a difference, since we consider these
+	       last cycles unavailable for the copper, so waking up in the last cycle has
+	       the same effect as waking up at the start of the line.  However, there is
+	       one possible problem:  If we're at 0xFFE0, any wait for an earlier position
+	       must _not_ complete (since, in effect, the current position will be back
+	       at 0/0).  This can be seen in the Superfrog copper list.
+	       Things get monstrously complicated if we try to handle this "properly" by
+	       incrementing vpos and setting c_hpos to 0.  Especially the various speedup
+	       hacks really assume that vpos remains constant during one line.  Hence,
+	       this hack: defer the entire decision until the next line if necessary.  */
+	    if (c_hpos >= (maxhpos & ~1))
+		break;
+	    cop_state.state = COP_wait;
+
+	    cop_state.vcmp = (cop_state.saved_i1 & (cop_state.saved_i2 | 0x8000)) >> 8;
+	    cop_state.hcmp = (cop_state.saved_i1 & cop_state.saved_i2 & 0xFE);
+
+	    vp = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80);
+
+	    if (cop_state.saved_i1 == 0xFFFF && cop_state.saved_i2 == 0xFFFE) {
+		cop_state.state = COP_stop;
+		copper_enabled_thisline = 0;
+		unset_special (SPCFLAG_COPPER);
+		goto out;
+	    }
+	    if (vp < cop_state.vcmp) {
+		copper_enabled_thisline = 0;
+		unset_special (SPCFLAG_COPPER);
+		goto out;
+	    }
+
+	    /* fall through */
+	do_wait:
+	case COP_wait:
+	    if (vp < cop_state.vcmp)
+		abort ();
+
+	    hp = c_hpos & (cop_state.saved_i2 & 0xFE);
+	    if (vp == cop_state.vcmp && hp < cop_state.hcmp) {
+		/* Position not reached yet.  */
+		if (currprefs.fast_copper && (cop_state.saved_i2 & 0xFE) == 0xFE) {
+		    int wait_finish = cop_state.hcmp - 2;
+		    /* This will leave c_hpos untouched if it's equal to wait_finish.  */
+		    if (wait_finish < c_hpos)
+			abort ();
+		    else if (wait_finish <= until_hpos) {
+			c_hpos = wait_finish;
+		    } else
+			c_hpos = until_hpos;
+		}	      
+		break;
+	    }
+
+	    /* Now we know that the comparisons were successful.  We might still
+	       have to wait for the blitter though.  */
+	    if ((cop_state.saved_i2 & 0x8000) == 0 && (DMACONR() & 0x4000)) {
+		/* We need to wait for the blitter.  */
+		cop_state.state = COP_bltwait;
+		copper_enabled_thisline = 0;
+		unset_special (SPCFLAG_COPPER);
+		goto out;
+	    }
+
+	    record_copper (cop_state.ip - 4, old_hpos, vpos);
+
+	    cop_state.state = COP_read1;
+	    break;
+
+	default:
+	    break;
+	}
+    }
+
+  out:
+    cop_state.hpos = c_hpos;
+
+    /* The test against maxhpos also prevents us from calling predict_copper
+       when we are being called from hsync_handler, which would not only be
+       stupid, but actively harmful.  */
+    if (currprefs.fast_copper && (regs.spcflags & SPCFLAG_COPPER) && c_hpos + 8 < maxhpos)
+	predict_copper ();
+}
+
+static void compute_spcflag_copper (void)
+{
+    copper_enabled_thisline = 0;
+    unset_special (SPCFLAG_COPPER);
+    if (! dmaen (DMA_COPPER) || cop_state.state == COP_stop || cop_state.state == COP_bltwait)
+	return;
+
+    if (cop_state.state == COP_wait) {
+	int vp = vpos & (((cop_state.saved_i2 >> 8) & 0x7F) | 0x80);
+
+	if (vp < cop_state.vcmp)
+	    return;
+    }
+    copper_enabled_thisline = 1;
+
+    if (currprefs.fast_copper)
+	predict_copper ();
+
+    if (! eventtab[ev_copper].active)
+	set_special (SPCFLAG_COPPER);
+}
+
+static void copper_handler (void)
+{
+    /* This will take effect immediately, within the same cycle.  */
+    set_special (SPCFLAG_COPPER);
+
+    if (! copper_enabled_thisline)
+	abort ();
+
+    eventtab[ev_copper].active = 0;
+}
+
+void blitter_done_notify (void)
+{
+    if (cop_state.state != COP_bltwait)
+	return;
+
+    cop_state.hpos = current_hpos () & ~1;
+    cop_state.vpos = vpos;
+    cop_state.state = COP_wait;
+    compute_spcflag_copper ();
+}
+
+void do_copper (void)
+{
+    int hpos = current_hpos ();
+    update_copper (hpos);
+}
+
+/* ADDR is the address that is going to be read/written; this access is
+   the reason why we want to update the copper.  This function is also
+   used from hsync_handler to finish up the line; for this case, we check
+   hpos against maxhpos.  */
+STATIC_INLINE void sync_copper_with_cpu (int hpos, int do_schedule, unsigned int addr)
+{
+    /* Need to let the copper advance to the current position.  */
+    if (eventtab[ev_copper].active) {
+	if (hpos != maxhpos) {
+	    /* There might be reasons why we don't actually need to bother
+	       updating the copper.  */
+	    if (hpos < cop_state.first_sync)
+		return;
+
+	    if ((cop_state.regtypes_modified & regtypes[addr & 0x1FE]) == 0)
+		return;
+	}
+
+	eventtab[ev_copper].active = 0;
+	if (do_schedule)
+	    events_schedule ();
+	set_special (SPCFLAG_COPPER);
+    }
+    if (copper_enabled_thisline)
+	update_copper (hpos);
+}
+
+STATIC_INLINE uae_u16 sprite_fetch (struct sprite *s, int dma)
+{
+    uae_u16 data = last_custom_value;
+    if (dma)
+	data = last_custom_value = chipmem_wget (s->pt);
+    s->pt += 2;
+    return data;
+}
+
+STATIC_INLINE void do_sprites_1 (int num, int cycle, int hpos)
+{
+    struct sprite *s = &spr[num];
+    int dma;
+
+    if (cycle == 0) {
+	if (vpos == s->vstart)
+	    s->state = SPR_waiting_stop;
+	if (vpos == s->vstop)
+	    s->state = SPR_restart;
+    }
+    if (!dmaen (DMA_SPRITE))
+	return;
+    dma = hpos < ddfstrt || diwstate != DIW_waiting_stop || !dmaen (DMA_BITPLANE);
+    if (s->state == SPR_restart || vpos == sprite_vblank_endline) {
+	uae_u16 data = sprite_fetch (s, dma);
+	s->pt += (sprite_width >> 3) - 2;
+#ifdef SPRITE_DEBUG
+	write_log ("dma:");
+#endif
+	if (cycle == 0) {
+	    SPRxPOS_1 (dma ? data : sprpos[num], num, hpos);
+	} else {
+	    s->state = SPR_waiting_start;
+	    SPRxCTL_1 (dma ? data : sprctl[num], num, hpos);
+	}
+    } else if (s->state == SPR_waiting_stop) {
+	uae_u16 data = sprite_fetch (s, dma);
+	/* Hack for X mouse auto-calibration */
+	if (num == 0 && cycle == 0)
+	    mousehack_handle (sprctl[0], sprpos[0]);
+
+	if (cycle == 0)
+	    SPRxDATA_1 (dma ? data : sprdata[num][0], num);
+	else
+	    SPRxDATB_1 (dma ? data : sprdatb[num][0], num);
+	switch (sprite_width)
+	    {
+	    case 64:
+	    {
+		uae_u32 data32 = sprite_fetch (s, dma);
+		uae_u32 data641 = sprite_fetch (s, dma);
+		uae_u32 data642 = sprite_fetch (s, dma);
+		if (dma) {
+		    if (cycle == 0) {
+			sprdata[num][3] = data642;
+			sprdata[num][2] = data641;
+			sprdata[num][1] = data32;
+		    } else {
+			sprdatb[num][3] = data642;
+			sprdatb[num][2] = data641;
+			sprdatb[num][1] = data32;
+		    }
+		}
+	    }
+	    break;
+	    case 32:
+	    {	
+		uae_u32 data32 = sprite_fetch (s, dma);
+		if (dma) {
+		    if (cycle == 0)
+			sprdata[num][1] = data32;
+		    else
+			sprdatb[num][1] = data32;
+		}
+	    }
+	    break;
+	}
+    }
+}
+
+#define SPR0_HPOS 0x15
+static void do_sprites (int hpos)
+{
+    int maxspr, minspr;
+    int i;
+
+    /* I don't know whether this is right. Some programs write the sprite pointers
+     * directly at the start of the copper list. With the test against currvp, the
+     * first two words of data are read on the second line in the frame. The problem
+     * occurs when the program jumps to another copperlist a few lines further down
+     * which _also_ writes the sprite pointer registers. This means that a) writing
+     * to the sprite pointers sets the state to SPR_restart; or b) that sprite DMA
+     * is disabled until the end of the vertical blanking interval. The HRM
+     * isn't clear - it says that the vertical sprite position can be set to any
+     * value, but this wouldn't be the first mistake... */
+    /* Update: I modified one of the programs to write the sprite pointers the
+     * second time only _after_ the VBlank interval, and it showed the same behaviour
+     * as it did unmodified under UAE with the above check. This indicates that the
+     * solution below is correct. */
+    /* Another update: seems like we have to use the NTSC value here (see Sanity Turmoil
+     * demo).  */
+    /* Maximum for Sanity Turmoil: 27.
+       Minimum for Sanity Arte: 22.  */
+    if (vpos < sprite_vblank_endline)
+	return;
+
+    maxspr = hpos;
+    minspr = last_sprite_hpos;
+
+    if (minspr >= SPR0_HPOS + MAX_SPRITES * 4 || maxspr < SPR0_HPOS)
+	return;
+
+    if (maxspr > SPR0_HPOS + MAX_SPRITES * 4)
+	maxspr = SPR0_HPOS + MAX_SPRITES * 4;
+    if (minspr < SPR0_HPOS)
+	minspr = SPR0_HPOS;
+
+    for (i = minspr; i < maxspr; i++) {
+	int cycle = -1;
+	switch ((i - SPR0_HPOS) & 3)
+	{
+	case 0:
+	    cycle = 0;
+	    break;
+	case 2:
+	    cycle = 1;
+	    break;
+	}
+	if (cycle >= 0)
+	    do_sprites_1 ((i - SPR0_HPOS) / 4, cycle, i);
+    }
+    last_sprite_hpos = hpos;
+}
+
+static void init_sprites (void)
+{
+    int i;
+
+    for (i = 0; i < MAX_SPRITES; i++)
+	spr[i].state = SPR_restart;
+    memset (sprpos, 0, sizeof sprpos);
+    memset (sprctl, 0, sizeof sprctl);
+}
+
+static void adjust_array_sizes (void)
+{
+#ifdef OS_WITHOUT_MEMORY_MANAGEMENT
+    if (delta_sprite_entry) {
+	void *p1,*p2;
+	int mcc = max_sprite_entry + 50 + delta_sprite_entry;
+	delta_sprite_entry = 0;
+	p1 = realloc (sprite_entries[0], mcc * sizeof (struct sprite_entry));
+	p2 = realloc (sprite_entries[1], mcc * sizeof (struct sprite_entry));
+	if (p1) sprite_entries[0] = p1;
+	if (p2) sprite_entries[1] = p2;
+	if (p1 && p2) {
+	    write_log ("new max_sprite_entry=%d\n",mcc);
+	    max_sprite_entry = mcc;
+	}
+    }
+    if (delta_color_change) {
+	void *p1,*p2;
+	int mcc = max_color_change + 200 + delta_color_change;
+	delta_color_change = 0;
+	p1 = realloc (color_changes[0], mcc * sizeof (struct color_change));
+	p2 = realloc (color_changes[1], mcc * sizeof (struct color_change));
+	if (p1) color_changes[0] = p1;
+	if (p2) color_changes[1] = p2;
+	if (p1 && p2) {
+	    write_log ("new max_color_change=%d\n",mcc);
+	    max_color_change = mcc;
+	}
+    }
+#endif
+}
+
+static void init_hardware_frame (void)
+{
+    next_lineno = 0;
+    nextline_how = nln_normal;
+    diwstate = DIW_waiting_start;
+    hdiwstate = DIW_waiting_start;
+}
+
+void init_hardware_for_drawing_frame (void)
+{
+    adjust_array_sizes ();
+
+    /* Avoid this code in the first frame after a customreset.  */
+    if (prev_sprite_entries) {
+	int first_pixel = prev_sprite_entries[0].first_pixel;
+	int npixels = prev_sprite_entries[prev_next_sprite_entry].first_pixel - first_pixel;
+	memset (spixels + first_pixel, 0, npixels * sizeof *spixels);
+	memset (spixstate.bytes + first_pixel, 0, npixels * sizeof *spixstate.bytes);
+    }
+    prev_next_sprite_entry = next_sprite_entry;
+
+    next_color_change = 0;
+    next_sprite_entry = 0;
+    next_color_entry = 0;
+    remembered_color_entry = -1;
+
+    prev_sprite_entries = sprite_entries[current_change_set];
+    curr_sprite_entries = sprite_entries[current_change_set ^ 1];
+    prev_color_changes = color_changes[current_change_set];
+    curr_color_changes = color_changes[current_change_set ^ 1];
+    prev_color_tables = color_tables[current_change_set];
+    curr_color_tables = color_tables[current_change_set ^ 1];
+
+    prev_drawinfo = line_drawinfo[current_change_set];
+    curr_drawinfo = line_drawinfo[current_change_set ^ 1];
+    current_change_set ^= 1;
+
+    color_src_match = color_dest_match = -1;
+
+    /* Use both halves of the array in alternating fashion.  */
+    curr_sprite_entries[0].first_pixel = current_change_set * MAX_SPR_PIXELS;
+    next_sprite_forced = 1;
+}
+
+static void do_savestate(void);
+
+static void vsync_handler (void)
+{
+    int i;
+    for (i = 0; i < MAX_SPRITES; i++)
+	spr[i].state = SPR_waiting_start;
+
+    n_frames++;
+
+    if (currprefs.m68k_speed == -1) {
+	frame_time_t curr_time = read_processor_time ();
+	vsyncmintime += vsynctime;
+	/* @@@ Mathias? How do you think we should do this? */
+	/* If we are too far behind, or we just did a reset, adjust the
+	 * needed time. */
+	if ((long int)(curr_time - vsyncmintime) > 0 || rpt_did_reset)
+	    vsyncmintime = curr_time + vsynctime;
+	rpt_did_reset = 0;
+    } else {
+#ifdef RPT_WORKS_OK
+	if (RPT_WORKS_OK) {
+	    frame_time_t curr_time;
+	    do
+		curr_time = read_processor_time ();
+	    while ((long int)(read_processor_time () - vsyncmintime) < 0);
+	    vsyncmintime = curr_time + vsynctime;
+	}
+#endif
+    }
+
+    handle_events ();
+
+    getjoystate (0, &joy0dir, &joy0button);
+    getjoystate (1, &joy1dir, &joy1button);
+
+    INTREQ (0x8020);
+    if (bplcon0 & 4)
+	lof ^= 0x8000;
+
+#ifdef PICASSO96
+    if (picasso_on)
+	picasso_handle_vsync ();
+#endif
+    vsync_handle_redraw (lof, lof_changed);
+
+    if (quit_program > 0)
+	return;
+
+    {
+	static int cnt = 0;
+	if (cnt == 0) {
+	    /* resolution_check_change (); */
+	    DISK_check_change ();
+	    cnt = 5;
+	}
+	cnt--;
+    }
+
+    /* Start a new set of copper records.  */
+    curr_cop_set ^= 1;
+    nr_cop_records[curr_cop_set] = 0;
+
+    /* For now, let's only allow this to change at vsync time.  It gets too
+     * hairy otherwise.  */
+    if (beamcon0 != new_beamcon0)
+	init_hz ();
+
+    lof_changed = 0;
+
+    cop_state.ip = cop1lc;
+    cop_state.state = COP_read1;
+    cop_state.vpos = 0;
+    cop_state.hpos = 0;
+    cop_state.ignore_next = 0;
+
+    init_hardware_frame ();
+
+#ifdef HAVE_GETTIMEOFDAY
+    {
+	struct timeval tv;
+	unsigned long int newtime;
+
+	gettimeofday (&tv,NULL);
+	newtime = (tv.tv_sec-seconds_base) * 1000 + tv.tv_usec / 1000;
+
+	if (!bogusframe) {
+	    lastframetime = newtime - msecs;
+
+#if 0 /* This doesn't appear to work too well yet... later.  */
+	    if (n_consecutive_skipped > currprefs.sound_pri_cutoff
+		|| lastframetime < currprefs.sound_pri_time)
+	    {
+		n_consecutive_skipped = 0;
+		clear_inhibit_frame (IHF_SOUNDADJUST);
+	    } else {
+		n_consecutive_skipped++;
+		set_inhibit_frame (IHF_SOUNDADJUST);
+		total_skipped++;
+	    }
+#endif
+
+	    frametime += lastframetime;
+	    timeframes++;
+
+	    if ((timeframes & 127) == 0)
+		gui_fps (1000 * timeframes / frametime);
+	}
+	msecs = newtime;
+	bogusframe = 0;
+    }
+#endif
+    if (ievent_alive > 0)
+	ievent_alive--;
+    if (timehack_alive > 0)
+	timehack_alive--;
+    CIA_vsync_handler ();
+}
+
+static void hsync_handler (void)
+{
+    /* Using 0x8A makes sure that we don't accidentally trip over the
+       modified_regtypes check.  */
+    sync_copper_with_cpu (maxhpos, 0, 0x8A);
+
+    finish_decisions ();
+    if (thisline_decision.plfleft != -1) {
+	if (currprefs.collision_level > 1)
+	    do_sprite_collisions ();
+	if (currprefs.collision_level > 2)
+	    do_playfield_collisions ();
+    }
+    hsync_record_line_state (next_lineno, nextline_how, thisline_changed);
+
+    eventtab[ev_hsync].evtime += get_cycles () - eventtab[ev_hsync].oldcycles;
+    eventtab[ev_hsync].oldcycles = get_cycles ();
+    CIA_hsync_handler ();
+
+    if (currprefs.produce_sound > 0) {
+	int nr;
+
+	update_audio ();
+
+	/* Sound data is fetched at the beginning of each line */
+	for (nr = 0; nr < 4; nr++) {
+	    struct audio_channel_data *cdp = audio_channel + nr;
+
+	    if (cdp->data_written == 2) {
+		cdp->data_written = 0;
+		cdp->nextdat = chipmem_wget (cdp->pt);
+		cdp->pt += 2;
+		if (cdp->state == 2 || cdp->state == 3) {
+		    if (cdp->wlen == 1) {
+			cdp->pt = cdp->lc;
+			cdp->wlen = cdp->len;
+			cdp->intreq2 = 1;
+		    } else
+			cdp->wlen = (cdp->wlen - 1) & 0xFFFF;
+		}
+	    }
+	}
+    }
+
+    hardware_line_completed (next_lineno);
+
+    /* In theory only an equality test is needed here - but if a program
+       goes haywire with the VPOSW register, it can cause us to miss this,
+       with vpos going into the thousands (and all the nasty consequences
+       this has).  */
+
+    if (++vpos >= (maxvpos + (lof != 0))) {
+	vpos = 0;
+	vsync_handler ();
+    }
+
+    DISK_update ();
+
+    is_lastline = vpos + 1 == maxvpos + (lof != 0) && currprefs.m68k_speed == -1 && ! rpt_did_reset;
+
+    if ((bplcon0 & 4) && currprefs.gfx_linedbl)
+	notice_interlace_seen ();
+
+    if (framecnt == 0) {
+	int lineno = vpos;
+	nextline_how = nln_normal;
+	if (currprefs.gfx_linedbl) {
+	    lineno *= 2;
+	    nextline_how = currprefs.gfx_linedbl == 1 ? nln_doubled : nln_nblack;
+	    if (bplcon0 & 4) {
+		if (!lof) {
+		    lineno++;
+		    nextline_how = nln_lower;
+		} else {
+		    nextline_how = nln_upper;
+		}
+	    }
+	}
+	next_lineno = lineno;
+	reset_decisions ();
+    }
+    if (uae_int_requested) {
+	set_uae_int_flag ();
+	INTREQ (0xA000);
+    }
+    /* See if there's a chance of a copper wait ending this line.  */
+    cop_state.hpos = 0;
+    compute_spcflag_copper ();
+}
+
+static void init_regtypes (void)
+{
+    int i;
+    for (i = 0; i < 512; i += 2) {
+	regtypes[i] = REGTYPE_ALL;
+	if ((i >= 0x20 && i < 0x28) || i == 0x08 || i == 0x7E)
+	    regtypes[i] = REGTYPE_DISK;
+	else if (i >= 0x68 && i < 0x70)
+	    regtypes[i] = REGTYPE_NONE;
+	else if (i >= 0x40 && i < 0x78)
+	    regtypes[i] = REGTYPE_BLITTER;
+	else if (i >= 0xA0 && i < 0xE0 && (i & 0xF) < 0xE)
+	    regtypes[i] = REGTYPE_AUDIO;
+	else if (i >= 0xA0 && i < 0xE0)
+	    regtypes[i] = REGTYPE_NONE;
+	else if (i >= 0xE0 && i < 0x100)
+	    regtypes[i] = REGTYPE_PLANE;
+	else if (i >= 0x120 && i < 0x180)
+	    regtypes[i] = REGTYPE_SPRITE;
+	else if (i >= 0x180 && i < 0x1C0)
+	    regtypes[i] = REGTYPE_COLOR;
+	else switch (i) {
+	case 0x02:
+	    /* DMACONR - setting this to REGTYPE_BLITTER will cause it to
+	       conflict with DMACON (since that is REGTYPE_ALL), and the
+	       blitter registers (for the BBUSY bit), but nothing else,
+	       which is (I think) what we want.  */
+	    regtypes[i] = REGTYPE_BLITTER;
+	    break;
+	case 0x04: case 0x06: case 0x2A: case 0x2C:
+	    regtypes[i] = REGTYPE_POS;
+	    break;
+	case 0x0A: case 0x0C:
+	case 0x12: case 0x14: case 0x16:
+	case 0x36:
+	    regtypes[i] = REGTYPE_JOYPORT;
+	    break;
+	case 0x104:
+	case 0x102:
+	    regtypes[i] = REGTYPE_PLANE;
+	    break;
+	case 0x88: case 0x8A:
+	case 0x8E: case 0x90: case 0x92: case 0x94:
+	case 0x96:
+	case 0x100:
+	    regtypes[i] |= REGTYPE_FORCE;
+	    break;
+	}
+    }
+}
+
+void init_eventtab (void)
+{
+    int i;
+
+    currcycle = 0;
+    for (i = 0; i < ev_max; i++) {
+	eventtab[i].active = 0;
+	eventtab[i].oldcycles = 0;
+    }
+
+    eventtab[ev_cia].handler = CIA_handler;
+    eventtab[ev_hsync].handler = hsync_handler;
+    eventtab[ev_hsync].evtime = maxhpos * CYCLE_UNIT + get_cycles ();
+    eventtab[ev_hsync].active = 1;
+
+    eventtab[ev_copper].handler = copper_handler;
+    eventtab[ev_copper].active = 0;
+    eventtab[ev_blitter].handler = blitter_handler;
+    eventtab[ev_blitter].active = 0;
+    eventtab[ev_disk].handler = DISK_handler;
+    eventtab[ev_disk].active = 0;
+    eventtab[ev_audio].handler = audio_evhandler;
+    eventtab[ev_audio].active = 0;
+    events_schedule ();
+}
+
+void customreset (void)
+{
+    int i;
+    int zero = 0;
+#ifdef HAVE_GETTIMEOFDAY
+    struct timeval tv;
+#endif
+
+    if (! savestate_state) {
+	currprefs.chipset_mask = changed_prefs.chipset_mask;
+	if ((currprefs.chipset_mask & CSMASK_AGA) == 0) {
+	    for (i = 0; i < 32; i++) {
+		current_colors.color_regs_ecs[i] = 0;
+		current_colors.acolors[i] = xcolors[0];
+	    }
+	} else {
+	    for (i = 0; i < 256; i++) {
+		current_colors.color_regs_aga[i] = 0;
+		current_colors.acolors[i] = CONVERT_RGB (zero);
+	    }
+	}
+
+	clx_sprmask = 0xFF;
+	clxdat = 0;
+
+	/* Clear the armed flags of all sprites.  */
+	memset (spr, 0, sizeof spr);
+	nr_armed = 0;
+
+	dmacon = intena = 0;
+
+	copcon = 0;
+	DSKLEN (0, 0);
+
+	bplcon0 = 0;
+	bplcon4 = 0x11; /* Get AGA chipset into ECS compatibility mode */
+	bplcon3 = 0xC00;
+
+	FMODE (0);
+	CLXCON (0);
+	lof = 0;
+    }
+
+    n_frames = 0;
+
+	mmu_set_tc(0);
+    expamem_reset ();
+
+    DISK_reset ();
+    CIA_reset ();
+    unset_special (~(SPCFLAG_BRK | SPCFLAG_MODE_CHANGE));
+
+    vpos = 0;
+
+    if (needmousehack ()) {
+#if 0
+	mousehack_setfollow();
+#else
+	mousehack_setdontcare();
+#endif
+    } else {
+	mousestate = normal_mouse;
+    }
+    ievent_alive = 0;
+    timehack_alive = 0;
+
+    curr_sprite_entries = 0;
+    prev_sprite_entries = 0;
+    sprite_entries[0][0].first_pixel = 0;
+    sprite_entries[1][0].first_pixel = MAX_SPR_PIXELS;
+    sprite_entries[0][1].first_pixel = 0;
+    sprite_entries[1][1].first_pixel = MAX_SPR_PIXELS;
+    memset (spixels, 0, sizeof spixels);
+    memset (&spixstate, 0, sizeof spixstate);
+
+    bltstate = BLT_done;
+    cop_state.state = COP_stop;
+    diwstate = DIW_waiting_start;
+    hdiwstate = DIW_waiting_start;
+    currcycle = 0;
+
+    new_beamcon0 = currprefs.ntscmode ? 0x00 : 0x20;
+    init_hz ();
+
+    audio_reset ();
+
+    init_sprites ();
+
+    init_hardware_frame ();
+    reset_drawing ();
+
+    reset_decisions ();
+
+#ifdef HAVE_GETTIMEOFDAY
+    gettimeofday (&tv, NULL);
+    seconds_base = tv.tv_sec;
+    bogusframe = 1;
+#endif
+
+    init_regtypes ();
+
+    sprite_buffer_res = currprefs.chipset_mask & CSMASK_AGA ? RES_HIRES : RES_LORES;
+    if (savestate_state == STATE_RESTORE) {
+	uae_u16 v;
+	uae_u32 vv;
+
+	update_adkmasks ();
+	INTENA (0);
+	INTREQ (0);
+#if 0
+	DMACON (0, 0);
+#endif
+	COPJMP1 (0);
+	if (diwhigh)
+	    diwhigh_written = 1;
+	v = bplcon0;
+	BPLCON0 (0, 0);
+	BPLCON0 (0, v);
+	FMODE (fmode);
+	if (!(currprefs.chipset_mask & CSMASK_AGA)) {
+	    for(i = 0 ; i < 32 ; i++)  {
+		vv = current_colors.color_regs_ecs[i];
+		current_colors.color_regs_ecs[i] = -1;
+		record_color_change (0, i, vv);
+		remembered_color_entry = -1;
+		current_colors.color_regs_ecs[i] = vv;
+		current_colors.acolors[i] = xcolors[vv];
+	    }
+	} else {
+	    for(i = 0 ; i < 256 ; i++)  {
+		vv = current_colors.color_regs_aga[i];
+		current_colors.color_regs_aga[i] = -1;
+		record_color_change (0, i, vv);
+		remembered_color_entry = -1;
+		current_colors.color_regs_aga[i] = vv;
+		current_colors.acolors[i] = CONVERT_RGB(vv);
+	    }
+	}
+	CLXCON (clxcon);
+	CLXCON2 (clxcon2);
+	calcdiw ();
+	write_log ("State restored\n");
+	dumpcustom ();
+	for (i = 0; i < 8; i++)
+	    nr_armed += spr[i].armed != 0;
+    }
+    expand_sprres ();
+}
+
+void dumpcustom (void)
+{
+    write_log ("DMACON: %x INTENA: %x INTREQ: %x VPOS: %x HPOS: %x\n", DMACONR(),
+	       (unsigned int)intena, (unsigned int)intreq, (unsigned int)vpos, (unsigned int)current_hpos());
+    write_log ("COP1LC: %08lx, COP2LC: %08lx\n", (unsigned long)cop1lc, (unsigned long)cop2lc);
+    write_log ("DIWSTRT: %04x DIWSTOP: %04x DDFSTRT: %04x DDFSTOP: %04x\n",
+	       (unsigned int)diwstrt, (unsigned int)diwstop, (unsigned int)ddfstrt, (unsigned int)ddfstop);
+    if (timeframes) {
+	write_log ("Average frame time: %d ms [frames: %d time: %d]\n",
+		   frametime / timeframes, timeframes, frametime);
+	if (total_skipped)
+	    write_log ("Skipped frames: %d\n", total_skipped);
+    }
+    /*for (i=0; i<256; i++) if (blitcount[i]) write_log ("minterm %x = %d\n",i,blitcount[i]);  blitter debug */
+}
+
+int intlev (void)
+{
+    uae_u16 imask = intreq & intena;
+    if (imask && (intena & 0x4000)){
+	if (imask & 0x2000) return 6;
+	if (imask & 0x1800) return 5;
+	if (imask & 0x0780) return 4;
+	if (imask & 0x0070) return 3;
+	if (imask & 0x0008) return 2;
+	if (imask & 0x0007) return 1;
+    }
+    return -1;
+}
+
+static void gen_custom_tables (void)
+{
+    int i;
+    for (i = 0; i < 256; i++) {
+	unsigned int j;
+	sprtaba[i] = ((((i >> 7) & 1) << 0)
+		      | (((i >> 6) & 1) << 2)
+		      | (((i >> 5) & 1) << 4)
+		      | (((i >> 4) & 1) << 6)
+		      | (((i >> 3) & 1) << 8)
+		      | (((i >> 2) & 1) << 10)
+		      | (((i >> 1) & 1) << 12)
+		      | (((i >> 0) & 1) << 14));
+	sprtabb[i] = sprtaba[i] * 2;
+	sprite_ab_merge[i] = (((i & 15) ? 1 : 0)
+			      | ((i & 240) ? 2 : 0));
+    }
+    for (i = 0; i < 16; i++) {
+	clxmask[i] = (((i & 1) ? 0xF : 0x3)
+		      | ((i & 2) ? 0xF0 : 0x30)
+		      | ((i & 4) ? 0xF00 : 0x300)
+		      | ((i & 8) ? 0xF000 : 0x3000));
+	sprclx[i] = (((i & 0x3) == 0x3 ? 1 : 0)
+		     | ((i & 0x5) == 0x5 ? 2 : 0)
+		     | ((i & 0x9) == 0x9 ? 4 : 0)
+		     | ((i & 0x6) == 0x6 ? 8 : 0)
+		     | ((i & 0xA) == 0xA ? 16 : 0)
+		     | ((i & 0xC) == 0xC ? 32 : 0)) << 9;
+    }
+}
+
+void custom_init (void)
+{
+    uaecptr pos;
+
+#ifdef OS_WITHOUT_MEMORY_MANAGEMENT
+    int num;
+
+    for (num = 0; num < 2; num++) {
+       sprite_entries[num] = xmalloc (max_sprite_entry * sizeof (struct sprite_entry));
+       color_changes[num] = xmalloc (max_color_change * sizeof (struct color_change));
+    }
+#endif
+
+    pos = here ();
+
+    org (RTAREA_BASE+0xFF70);
+    calltrap (deftrap (mousehack_helper));
+    dw (RTS);
+
+    org (RTAREA_BASE+0xFFA0);
+    calltrap (deftrap (timehack_helper));
+    dw (RTS);
+
+    org (pos);
+
+    gen_custom_tables ();
+    build_blitfilltable ();
+
+    drawing_init ();
+
+    mousestate = unknown_mouse;
+
+    if (needmousehack ())
+	mousehack_setfollow ();
+
+    create_cycle_diagram_table ();
+}
+
+/* Custom chip memory bank */
+
+static uae_u32 custom_lget (uaecptr) REGPARAM;
+static uae_u32 custom_wget (uaecptr) REGPARAM;
+static uae_u32 custom_bget (uaecptr) REGPARAM;
+static void custom_lput (uaecptr, uae_u32) REGPARAM;
+static void custom_wput (uaecptr, uae_u32) REGPARAM;
+static void custom_bput (uaecptr, uae_u32) REGPARAM;
+
+addrbank custom_bank = {
+    custom_lget, custom_wget, custom_bget,
+    custom_lput, custom_wput, custom_bput,
+    default_xlate, default_check, NULL
+};
+
+STATIC_INLINE uae_u32 REGPARAM2 custom_wget_1 (uaecptr addr)
+{
+    uae_u16 v;
+    special_mem |= S_READ;
+    switch (addr & 0x1FE) {
+     case 0x002: v = DMACONR (); break;
+     case 0x004: v = VPOSR (); break;
+     case 0x006: v = VHPOSR (); break;
+
+     case 0x008: v = DSKDATR (current_hpos ()); break;
+
+     case 0x00A: v = JOY0DAT (); break;
+     case 0x00C: v =  JOY1DAT (); break;
+     case 0x00E: v =  CLXDAT (); break;
+     case 0x010: v = ADKCONR (); break;
+
+     case 0x012: v = POT0DAT (); break;
+     case 0x016: v = POTGOR (); break;
+     case 0x018: v = SERDATR (); break;
+     case 0x01A: v = DSKBYTR (current_hpos ()); break;
+     case 0x01C: v = INTENAR (); break;
+     case 0x01E: v = INTREQR (); break;
+     case 0x07C: v = DENISEID (); break;
+
+     case 0x180: case 0x182: case 0x184: case 0x186: case 0x188: case 0x18A:
+     case 0x18C: case 0x18E: case 0x190: case 0x192: case 0x194: case 0x196:
+     case 0x198: case 0x19A: case 0x19C: case 0x19E: case 0x1A0: case 0x1A2:
+     case 0x1A4: case 0x1A6: case 0x1A8: case 0x1AA: case 0x1AC: case 0x1AE:
+     case 0x1B0: case 0x1B2: case 0x1B4: case 0x1B6: case 0x1B8: case 0x1BA:
+     case 0x1BC: case 0x1BE:
+	v = COLOR_READ ((addr & 0x3E) / 2);
+	break;
+
+     default:
+       v = last_custom_value;
+       custom_wput (addr, v);
+       last_custom_value = 0xffff;
+       return v;
+    }
+    last_custom_value = v;
+    return v;
+}
+
+uae_u32 REGPARAM2 custom_wget (uaecptr addr)
+{
+    sync_copper_with_cpu (current_hpos (), 1, addr);
+    return custom_wget_1 (addr);
+}
+
+uae_u32 REGPARAM2 custom_bget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    return custom_wget (addr & 0xfffe) >> (addr & 1 ? 0 : 8);
+}
+
+uae_u32 REGPARAM2 custom_lget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    return ((uae_u32)custom_wget (addr & 0xfffe) << 16) | custom_wget ((addr + 2) & 0xfffe);
+}
+
+void REGPARAM2 custom_wput_1 (int hpos, uaecptr addr, uae_u32 value)
+{
+    addr &= 0x1FE;
+    last_custom_value = value;
+    switch (addr) {
+     case 0x020: DSKPTH (value); break;
+     case 0x022: DSKPTL (value); break;
+     case 0x024: DSKLEN (value, hpos); break;
+     case 0x026: DSKDAT (value); break;
+
+     case 0x02A: VPOSW (value); break;
+     case 0x02E: COPCON (value); break;
+     case 0x030: SERDAT (value); break;
+     case 0x032: SERPER (value); break;
+     case 0x034: POTGO (value); break;
+     case 0x040: BLTCON0 (value); break;
+     case 0x042: BLTCON1 (value); break;
+
+     case 0x044: BLTAFWM (value); break;
+     case 0x046: BLTALWM (value); break;
+
+     case 0x050: BLTAPTH (value); break;
+     case 0x052: BLTAPTL (value); break;
+     case 0x04C: BLTBPTH (value); break;
+     case 0x04E: BLTBPTL (value); break;
+     case 0x048: BLTCPTH (value); break;
+     case 0x04A: BLTCPTL (value); break;
+     case 0x054: BLTDPTH (value); break;
+     case 0x056: BLTDPTL (value); break;
+
+     case 0x058: BLTSIZE (value); break;
+
+     case 0x064: BLTAMOD (value); break;
+     case 0x062: BLTBMOD (value); break;
+     case 0x060: BLTCMOD (value); break;
+     case 0x066: BLTDMOD (value); break;
+
+     case 0x070: BLTCDAT (value); break;
+     case 0x072: BLTBDAT (value); break;
+     case 0x074: BLTADAT (value); break;
+
+     case 0x07E: DSKSYNC (value); break;
+
+     case 0x080: COP1LCH (value); break;
+     case 0x082: COP1LCL (value); break;
+     case 0x084: COP2LCH (value); break;
+     case 0x086: COP2LCL (value); break;
+
+     case 0x088: COPJMP1 (value); break;
+     case 0x08A: COPJMP2 (value); break;
+
+     case 0x08E: DIWSTRT (hpos, value); break;
+     case 0x090: DIWSTOP (hpos, value); break;
+     case 0x092: DDFSTRT (hpos, value); break;
+     case 0x094: DDFSTOP (hpos, value); break;
+
+     case 0x096: DMACON (hpos, value); break;
+     case 0x098: CLXCON (value); break;
+     case 0x09A: INTENA (value); break;
+     case 0x09C: INTREQ (value); break;
+     case 0x09E: ADKCON (value); break;
+
+     case 0x0A0: AUDxLCH (0, value); break;
+     case 0x0A2: AUDxLCL (0, value); break;
+     case 0x0A4: AUDxLEN (0, value); break;
+     case 0x0A6: AUDxPER (0, value); break;
+     case 0x0A8: AUDxVOL (0, value); break;
+     case 0x0AA: AUDxDAT (0, value); break;
+
+     case 0x0B0: AUDxLCH (1, value); break;
+     case 0x0B2: AUDxLCL (1, value); break;
+     case 0x0B4: AUDxLEN (1, value); break;
+     case 0x0B6: AUDxPER (1, value); break;
+     case 0x0B8: AUDxVOL (1, value); break;
+     case 0x0BA: AUDxDAT (1, value); break;
+
+     case 0x0C0: AUDxLCH (2, value); break;
+     case 0x0C2: AUDxLCL (2, value); break;
+     case 0x0C4: AUDxLEN (2, value); break;
+     case 0x0C6: AUDxPER (2, value); break;
+     case 0x0C8: AUDxVOL (2, value); break;
+     case 0x0CA: AUDxDAT (2, value); break;
+
+     case 0x0D0: AUDxLCH (3, value); break;
+     case 0x0D2: AUDxLCL (3, value); break;
+     case 0x0D4: AUDxLEN (3, value); break;
+     case 0x0D6: AUDxPER (3, value); break;
+     case 0x0D8: AUDxVOL (3, value); break;
+     case 0x0DA: AUDxDAT (3, value); break;
+
+     case 0x0E0: BPLPTH (hpos, value, 0); break;
+     case 0x0E2: BPLPTL (hpos, value, 0); break;
+     case 0x0E4: BPLPTH (hpos, value, 1); break;
+     case 0x0E6: BPLPTL (hpos, value, 1); break;
+     case 0x0E8: BPLPTH (hpos, value, 2); break;
+     case 0x0EA: BPLPTL (hpos, value, 2); break;
+     case 0x0EC: BPLPTH (hpos, value, 3); break;
+     case 0x0EE: BPLPTL (hpos, value, 3); break;
+     case 0x0F0: BPLPTH (hpos, value, 4); break;
+     case 0x0F2: BPLPTL (hpos, value, 4); break;
+     case 0x0F4: BPLPTH (hpos, value, 5); break;
+     case 0x0F6: BPLPTL (hpos, value, 5); break;
+     case 0x0F8: BPLPTH (hpos, value, 6); break;
+     case 0x0FA: BPLPTL (hpos, value, 6); break;
+     case 0x0FC: BPLPTH (hpos, value, 7); break;
+     case 0x0FE: BPLPTL (hpos, value, 7); break;
+
+     case 0x100: BPLCON0 (hpos, value); break;
+     case 0x102: BPLCON1 (hpos, value); break;
+     case 0x104: BPLCON2 (hpos, value); break;
+     case 0x106: BPLCON3 (hpos, value); break;
+
+     case 0x108: BPL1MOD (hpos, value); break;
+     case 0x10A: BPL2MOD (hpos, value); break;
+     case 0x10E: CLXCON2 (value); break;
+
+     case 0x110: BPL1DAT (hpos, value); break;
+     case 0x112: BPL2DAT (value); break;
+     case 0x114: BPL3DAT (value); break;
+     case 0x116: BPL4DAT (value); break;
+     case 0x118: BPL5DAT (value); break;
+     case 0x11A: BPL6DAT (value); break;
+     case 0x11C: BPL7DAT (value); break;
+     case 0x11E: BPL8DAT (value); break;
+
+     case 0x180: case 0x182: case 0x184: case 0x186: case 0x188: case 0x18A:
+     case 0x18C: case 0x18E: case 0x190: case 0x192: case 0x194: case 0x196:
+     case 0x198: case 0x19A: case 0x19C: case 0x19E: case 0x1A0: case 0x1A2:
+     case 0x1A4: case 0x1A6: case 0x1A8: case 0x1AA: case 0x1AC: case 0x1AE:
+     case 0x1B0: case 0x1B2: case 0x1B4: case 0x1B6: case 0x1B8: case 0x1BA:
+     case 0x1BC: case 0x1BE:
+	COLOR_WRITE (hpos, value & 0xFFF, (addr & 0x3E) / 2);
+	break;
+     case 0x120: case 0x124: case 0x128: case 0x12C:
+     case 0x130: case 0x134: case 0x138: case 0x13C:
+	SPRxPTH (hpos, value, (addr - 0x120) / 4);
+	break;
+     case 0x122: case 0x126: case 0x12A: case 0x12E:
+     case 0x132: case 0x136: case 0x13A: case 0x13E:
+	SPRxPTL (hpos, value, (addr - 0x122) / 4);
+	break;
+     case 0x140: case 0x148: case 0x150: case 0x158:
+     case 0x160: case 0x168: case 0x170: case 0x178:
+	SPRxPOS (hpos, value, (addr - 0x140) / 8);
+	break;
+     case 0x142: case 0x14A: case 0x152: case 0x15A:
+     case 0x162: case 0x16A: case 0x172: case 0x17A:
+	SPRxCTL (hpos, value, (addr - 0x142) / 8);
+	break;
+     case 0x144: case 0x14C: case 0x154: case 0x15C:
+     case 0x164: case 0x16C: case 0x174: case 0x17C:
+	SPRxDATA (hpos, value, (addr - 0x144) / 8);
+	break;
+     case 0x146: case 0x14E: case 0x156: case 0x15E:
+     case 0x166: case 0x16E: case 0x176: case 0x17E:
+	SPRxDATB (hpos, value, (addr - 0x146) / 8);
+	break;
+
+     case 0x36: JOYTEST (value); break;
+     case 0x5A: BLTCON0L (value); break;
+     case 0x5C: BLTSIZV (value); break;
+     case 0x5E: BLTSIZH (value); break;
+     case 0x1E4: DIWHIGH (hpos, value); break;
+     case 0x10C: BPLCON4 (hpos, value); break;
+     case 0x1FC: FMODE (value); break;
+    }
+}
+
+void REGPARAM2 custom_wput (uaecptr addr, uae_u32 value)
+{
+    int hpos = current_hpos ();
+    special_mem |= S_WRITE;
+
+    sync_copper_with_cpu (hpos, 1, addr);
+    custom_wput_1 (hpos, addr, value);
+}
+
+void REGPARAM2 custom_bput (uaecptr addr, uae_u32 value)
+{
+    static int warned = 0;
+    /* Is this correct now? (There are people who bput things to the upper byte of AUDxVOL). */
+    uae_u16 rval = (value << 8) | (value & 0xFF);
+    special_mem |= S_WRITE;
+    custom_wput (addr, rval);
+    if (!warned || ((addr & 0xff0000) == 0xda0000))	{
+		write_log ("Byte put to custom register (addr=%lx val=%lx)\n", addr, value);
+		warned++;
+	}
+}
+
+void REGPARAM2 custom_lput(uaecptr addr, uae_u32 value)
+{
+    special_mem |= S_WRITE;
+    custom_wput (addr & 0xfffe, value >> 16);
+    custom_wput ((addr + 2) & 0xfffe, (uae_u16)value);
+}
+
+void custom_prepare_savestate (void)
+{
+    /* force blitter to finish, no support for saving full blitter state yet */
+    if (eventtab[ev_blitter].active) {
+	unsigned int olddmacon = dmacon;
+	dmacon |= DMA_BLITTER; /* ugh.. */
+	blitter_handler ();
+	dmacon = olddmacon;
+    }
+}
+
+#define RB restore_u8 ()
+#define RW restore_u16 ()
+#define RL restore_u32 ()
+
+uae_u8 *restore_custom (uae_u8 *src)
+{
+    uae_u16 dsklen, dskbytr, dskdatr;
+    int dskpt;
+    int i;
+
+    audio_reset ();
+
+    currprefs.chipset_mask = RL;
+    RW;				/* 000 ? */
+    RW;				/* 002 DMACONR */
+    RW;				/* 004 VPOSR */
+    RW;				/* 006 VHPOSR */
+    dskdatr = RW;		/* 008 DSKDATR */
+    RW;				/* 00A JOY0DAT */
+    RW;				/* 00C JOY1DAT */
+    clxdat = RW;		/* 00E CLXDAT */
+    RW;				/* 010 ADKCONR */
+    RW;				/* 012 POT0DAT* */
+    RW;				/* 014 POT1DAT* */
+    RW;				/* 016 POTINP* */
+    RW;				/* 018 SERDATR* */
+    dskbytr = RW;		/* 01A DSKBYTR */
+    RW;				/* 01C INTENAR */
+    RW;				/* 01E INTREQR */
+    dskpt = RL;			/* 020-022 DSKPT */
+    dsklen = RW;		/* 024 DSKLEN */
+    RW;				/* 026 DSKDAT */
+    RW;				/* 028 REFPTR */
+    lof = RW;			/* 02A VPOSW */
+    RW;				/* 02C VHPOSW */
+    COPCON(RW);			/* 02E COPCON */
+    RW;				/* 030 SERDAT* */
+    RW;				/* 032 SERPER* */
+    POTGO(RW);			/* 034 POTGO */
+    RW;				/* 036 JOYTEST* */
+    RW;				/* 038 STREQU */
+    RW;				/* 03A STRVHBL */
+    RW;				/* 03C STRHOR */
+    RW;				/* 03E STRLONG */
+    BLTCON0(RW);		/* 040 BLTCON0 */
+    BLTCON1(RW);		/* 042 BLTCON1 */
+    BLTAFWM(RW);		/* 044 BLTAFWM */
+    BLTALWM(RW);		/* 046 BLTALWM */
+    BLTCPTH(RL);		/* 048-04B BLTCPT */
+    BLTBPTH(RL);		/* 04C-04F BLTBPT */
+    BLTAPTH(RL);		/* 050-053 BLTAPT */
+    BLTDPTH(RL);		/* 054-057 BLTDPT */
+    RW;				/* 058 BLTSIZE */
+    RW;				/* 05A BLTCON0L */
+    oldvblts = RW;		/* 05C BLTSIZV */
+    RW;				/* 05E BLTSIZH */
+    BLTCMOD(RW);		/* 060 BLTCMOD */
+    BLTBMOD(RW);		/* 062 BLTBMOD */
+    BLTAMOD(RW);		/* 064 BLTAMOD */
+    BLTDMOD(RW);		/* 066 BLTDMOD */
+    RW;				/* 068 ? */
+    RW;				/* 06A ? */
+    RW;				/* 06C ? */
+    RW;				/* 06E ? */
+    BLTCDAT(RW);		/* 070 BLTCDAT */
+    BLTBDAT(RW);		/* 072 BLTBDAT */
+    BLTADAT(RW);		/* 074 BLTADAT */
+    RW;				/* 076 ? */
+    RW;				/* 078 ? */
+    RW;				/* 07A ? */
+    RW;				/* 07C LISAID */
+    DSKSYNC(RW);		/* 07E DSKSYNC */
+    cop1lc = RL;		/* 080/082 COP1LC */
+    cop2lc = RL;		/* 084/086 COP2LC */
+    RW;				/* 088 ? */
+    RW;				/* 08A ? */
+    RW;				/* 08C ? */
+    diwstrt = RW;		/* 08E DIWSTRT */
+    diwstop = RW;		/* 090 DIWSTOP */
+    ddfstrt = RW;		/* 092 DDFSTRT */
+    ddfstop = RW;		/* 094 DDFSTOP */
+    dmacon = RW & ~(0x2000|0x4000); /* 096 DMACON */
+    CLXCON(RW);			/* 098 CLXCON */
+    intena = RW;		/* 09A INTENA */
+    intreq = RW;		/* 09C INTREQ */
+    adkcon = RW;		/* 09E ADKCON */
+    for (i = 0; i < 8; i++)
+	bplpt[i] = RL;
+    bplcon0 = RW;		/* 100 BPLCON0 */
+    bplcon1 = RW;		/* 102 BPLCON1 */
+    bplcon2 = RW;		/* 104 BPLCON2 */
+    bplcon3 = RW;		/* 106 BPLCON3 */
+    bpl1mod = RW;		/* 108 BPL1MOD */
+    bpl2mod = RW;		/* 10A BPL2MOD */
+    bplcon4 = RW;		/* 10C BPLCON4 */
+    clxcon2 = RW;		/* 10E CLXCON2* */
+    for(i = 0; i < 8; i++)
+	RW;			/*     BPLXDAT */
+    for(i = 0; i < 32; i++)
+	current_colors.color_regs_ecs[i] = RW; /* 180 COLORxx */
+    RW;				/* 1C0 ? */
+    RW;				/* 1C2 ? */
+    RW;				/* 1C4 ? */
+    RW;				/* 1C6 ? */
+    RW;				/* 1C8 ? */
+    RW;				/* 1CA ? */
+    RW;				/* 1CC ? */
+    RW;				/* 1CE ? */
+    RW;				/* 1D0 ? */
+    RW;				/* 1D2 ? */
+    RW;				/* 1D4 ? */
+    RW;				/* 1D6 ? */
+    RW;				/* 1D8 ? */
+    RW;				/* 1DA ? */
+    new_beamcon0 = RW;		/* 1DC BEAMCON0 */
+    RW;				/* 1DE ? */
+    RW;				/* 1E0 ? */
+    RW;				/* 1E2 ? */
+    RW;				/* 1E4 ? */
+    RW;				/* 1E6 ? */
+    RW;				/* 1E8 ? */
+    RW;				/* 1EA ? */
+    RW;				/* 1EC ? */
+    RW;				/* 1EE ? */
+    RW;				/* 1F0 ? */
+    RW;				/* 1F2 ? */
+    RW;				/* 1F4 ? */
+    RW;				/* 1F6 ? */
+    RW;				/* 1F8 ? */
+    RW;				/* 1FA ? */
+    fmode = RW;			/* 1FC FMODE */
+    RW;				/* 1FE ? */
+
+    DISK_restore_custom (dskpt, dsklen, dskdatr, dskbytr);
+
+    return src;
+}
+
+
+#define SB save_u8
+#define SW save_u16
+#define SL save_u32
+
+extern uae_u16 serper;
+
+uae_u8 *save_custom (int *len)
+{
+    uae_u8 *dstbak, *dst;
+    int i;
+    uae_u32 dskpt;
+    uae_u16 dsklen, dsksync, dskdatr, dskbytr;
+
+    DISK_save_custom (&dskpt, &dsklen, &dsksync, &dskdatr, &dskbytr);
+    dstbak = dst = malloc (8+256*2);
+    SL (currprefs.chipset_mask);
+    SW (0);			/* 000 ? */
+    SW (dmacon);		/* 002 DMACONR */
+    SW (VPOSR());		/* 004 VPOSR */
+    SW (VHPOSR());		/* 006 VHPOSR */
+    SW (dskdatr);		/* 008 DSKDATR */
+    SW (JOY0DAT());		/* 00A JOY0DAT */
+    SW (JOY1DAT());		/* 00C JOY1DAT */
+    SW (clxdat);		/* 00E CLXDAT */
+    SW (ADKCONR());		/* 010 ADKCONR */
+    SW (POT0DAT());		/* 012 POT0DAT */
+    SW (POT0DAT());		/* 014 POT1DAT */
+    SW (0)	;		/* 016 POTINP * */
+    SW (0);			/* 018 SERDATR * */
+    SW (dskbytr);		/* 01A DSKBYTR */
+    SW (INTENAR());		/* 01C INTENAR */
+    SW (INTREQR());		/* 01E INTREQR */
+    SL (dskpt);			/* 020-023 DSKPT */
+    SW (dsklen);		/* 024 DSKLEN */
+    SW (0);			/* 026 DSKDAT */
+    SW (0);			/* 028 REFPTR */
+    SW (lof);			/* 02A VPOSW */
+    SW (0);			/* 02C VHPOSW */
+    SW (copcon);		/* 02E COPCON */
+    SW (serper);		/* 030 SERDAT * */
+    SW (serdat);		/* 032 SERPER * */
+    SW (potgo_value);		/* 034 POTGO */
+    SW (0);			/* 036 JOYTEST * */
+    SW (0);			/* 038 STREQU */
+    SW (0);			/* 03A STRVBL */
+    SW (0);			/* 03C STRHOR */
+    SW (0);			/* 03E STRLONG */
+    SW (bltcon0);		/* 040 BLTCON0 */
+    SW (bltcon1);		/* 042 BLTCON1 */
+    SW (blt_info.bltafwm);	/* 044 BLTAFWM */
+    SW (blt_info.bltalwm);	/* 046 BLTALWM */
+    SL (bltcpt);		/* 048-04B BLTCPT */
+    SL (bltbpt);		/* 04C-04F BLTCPT */
+    SL (bltapt);		/* 050-043 BLTCPT */
+    SL (bltdpt);		/* 054-057 BLTCPT */
+    SW (0);			/* 058 BLTSIZE */
+    SW (0);			/* 05A BLTCON0L (use BLTCON0 instead) */
+    SW (oldvblts);		/* 05C BLTSIZV */
+    SW (blt_info.hblitsize);	/* 05E BLTSIZH */
+    SW (blt_info.bltcmod);	/* 060 BLTCMOD */
+    SW (blt_info.bltbmod);	/* 062 BLTBMOD */
+    SW (blt_info.bltamod);	/* 064 BLTAMOD */
+    SW (blt_info.bltdmod);	/* 066 BLTDMOD */
+    SW (0);			/* 068 ? */
+    SW (0);			/* 06A ? */
+    SW (0);			/* 06C ? */
+    SW (0);			/* 06E ? */
+    SW (blt_info.bltcdat);	/* 070 BLTCDAT */
+    SW (blt_info.bltbdat);	/* 072 BLTBDAT */
+    SW (blt_info.bltadat);	/* 074 BLTADAT */
+    SW (0);			/* 076 ? */
+    SW (0);			/* 078 ? */
+    SW (0);			/* 07A ? */
+    SW (DENISEID());		/* 07C DENISEID/LISAID */
+    SW (dsksync);		/* 07E DSKSYNC */
+    SL (cop1lc);		/* 080-083 COP1LC */
+    SL (cop2lc);		/* 084-087 COP2LC */
+    SW (0);			/* 088 ? */
+    SW (0);			/* 08A ? */
+    SW (0);			/* 08C ? */
+    SW (diwstrt);		/* 08E DIWSTRT */
+    SW (diwstop);		/* 090 DIWSTOP */
+    SW (ddfstrt);		/* 092 DDFSTRT */
+    SW (ddfstop);		/* 094 DDFSTOP */
+    SW (dmacon);		/* 096 DMACON */
+    SW (clxcon);		/* 098 CLXCON */
+    SW (intena);		/* 09A INTENA */
+    SW (intreq);		/* 09C INTREQ */
+    SW (adkcon);		/* 09E ADKCON */
+    for (i = 0; i < 8; i++)
+	SL (bplpt[i]);		/* 0E0-0FE BPLxPT */
+    SW (bplcon0);		/* 100 BPLCON0 */
+    SW (bplcon1);		/* 102 BPLCON1 */
+    SW (bplcon2);		/* 104 BPLCON2 */
+    SW (bplcon3);		/* 106 BPLCON3 */
+    SW (bpl1mod);		/* 108 BPL1MOD */
+    SW (bpl2mod);		/* 10A BPL2MOD */
+    SW (bplcon4);		/* 10C BPLCON4 */
+    SW (clxcon2);		/* 10E CLXCON2 */
+    for (i = 0;i < 8; i++)
+	SW (0);			/* 110 BPLxDAT */
+    for ( i = 0; i < 32; i++)
+	SW (current_colors.color_regs_ecs[i]); /* 180-1BE COLORxx */
+    SW (0);			/* 1C0 */
+    SW (0);			/* 1C2 */
+    SW (0);			/* 1C4 */
+    SW (0);			/* 1C6 */
+    SW (0);			/* 1C8 */
+    SW (0);			/* 1CA */
+    SW (0);			/* 1CC */
+    SW (0);			/* 1CE */
+    SW (0);			/* 1D0 */
+    SW (0);			/* 1D2 */
+    SW (0);			/* 1D4 */
+    SW (0);			/* 1D6 */
+    SW (0);			/* 1D8 */
+    SW (0);			/* 1DA */
+    SW (beamcon0);		/* 1DC BEAMCON0 */
+    SW (0);			/* 1DE */
+    SW (0);			/* 1E0 */
+    SW (0);			/* 1E2 */
+    SW (0);			/* 1E4 */
+    SW (0);			/* 1E6 */
+    SW (0);			/* 1E8 */
+    SW (0);			/* 1EA */
+    SW (0);			/* 1EC */
+    SW (0);			/* 1EE */
+    SW (0);			/* 1F0 */
+    SW (0);			/* 1F2 */
+    SW (0);			/* 1F4 */
+    SW (0);			/* 1F6 */
+    SW (0);			/* 1F8 */
+    SW (0);			/* 1FA */
+    SW (fmode);			/* 1FC FMODE */
+    SW (0xffff);		/* 1FE */
+
+    *len = dst - dstbak;
+    return dstbak;
+}
+
+uae_u8 *restore_custom_agacolors (uae_u8 *src)
+{
+    int i;
+
+    for (i = 0; i < 256; i++)
+	current_colors.color_regs_aga[i] = RL;
+    return src;
+}
+
+uae_u8 *save_custom_agacolors (int *len)
+{
+    uae_u8 *dstbak, *dst;
+    int i;
+
+    dstbak = dst = malloc (256*4);
+    for (i = 0; i < 256; i++)
+	SL (current_colors.color_regs_aga[i]);
+    *len = dst - dstbak;
+    return dstbak;
+}
+
+uae_u8 *restore_custom_sprite (uae_u8 *src, int num)
+{
+    spr[num].pt = RL;		/* 120-13E SPRxPT */
+    sprpos[num] = RW;		/* 1x0 SPRxPOS */
+    sprctl[num] = RW;		/* 1x2 SPRxPOS */
+    sprdata[num][0] = RW;	/* 1x4 SPRxDATA */
+    sprdatb[num][0] = RW;	/* 1x6 SPRxDATB */
+    sprdata[num][1] = RW;
+    sprdatb[num][1] = RW;
+    sprdata[num][2] = RW;
+    sprdatb[num][2] = RW;
+    sprdata[num][3] = RW;
+    sprdatb[num][3] = RW;
+    spr[num].armed = RB;
+    return src;
+}
+
+uae_u8 *save_custom_sprite(int *len, int num)
+{
+    uae_u8 *dstbak, *dst;
+
+    dstbak = dst = malloc (25);
+    SL (spr[num].pt);		/* 120-13E SPRxPT */
+    SW (sprpos[num]);		/* 1x0 SPRxPOS */
+    SW (sprctl[num]);		/* 1x2 SPRxPOS */
+    SW (sprdata[num][0]);	/* 1x4 SPRxDATA */
+    SW (sprdatb[num][0]);	/* 1x6 SPRxDATB */
+    SW (sprdata[num][1]);
+    SW (sprdatb[num][1]);
+    SW (sprdata[num][2]);
+    SW (sprdatb[num][2]);
+    SW (sprdata[num][3]);
+    SW (sprdatb[num][3]);
+    SB (spr[num].armed ? 1 : 0);
+    *len = dst - dstbak;
+    return dstbak;
+}
diff -urN src-0.8.22/src/debug.c src-0.8.22-mmu/src/debug.c
--- src-0.8.22/src/debug.c	2001-10-24 12:59:20.000000000 +0200
+++ src-0.8.22-mmu/src/debug.c	2003-07-25 12:13:43.000000000 +0200
@@ -680,6 +680,9 @@
 		    printf ("Plane %d offset %d\n", i, bpl_off[i]);
 	    }
 	    break;
+	case 'u':
+	    mmu_dump_tables();
+	    break;
 	case 'h':
 	case '?':
 	{
@@ -704,6 +707,7 @@
 	    printf ("  W <address> <value>:  Write into Amiga memory\n");
 	    printf ("  S <file> <addr> <n>:  Save a block of Amiga memory\n");
 	    printf ("  T:                    Show exec tasks and their PCs\n");
+	    printf ("  u:                    Dump the MMU translation tables and state\n");
 	    printf ("  h,?:                  Show this help page\n");
 	    printf ("  q:                    Quit the emulator. You don't want to use this command.\n\n");
 	}
diff -urN src-0.8.22/src/debug.c~ src-0.8.22-mmu/src/debug.c~
--- src-0.8.22/src/debug.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/debug.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,717 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * Debugger
+  *
+  * (c) 1995 Bernd Schmidt
+  *
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include <ctype.h>
+#include <signal.h>
+
+#include "config.h"
+#include "options.h"
+#include "threaddep/thread.h"
+#include "uae.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "debug.h"
+#include "cia.h"
+#include "xwin.h"
+#include "gui.h"
+#include "identify.h"
+
+static int debugger_active = 0;
+static uaecptr skipaddr;
+static int do_skip;
+int debugging = 0;
+
+static FILE *logfile;
+
+void activate_debugger (void)
+{
+    if (logfile)
+	fclose (logfile);
+    logfile = 0;
+    do_skip = 0;
+    if (debugger_active)
+	return;
+    debugger_active = 1;
+    set_special (SPCFLAG_BRK);
+    debugging = 1;
+    /* use_debugger = 1; */
+}
+
+int firsthist = 0;
+int lasthist = 0;
+#ifdef NEED_TO_DEBUG_BADLY
+struct regstruct history[MAX_HIST];
+union flagu historyf[MAX_HIST];
+#else
+uaecptr history[MAX_HIST];
+#endif
+
+static void ignore_ws (char **c)
+{
+    while (**c && isspace(**c)) (*c)++;
+}
+
+static uae_u32 readhex (char **c)
+{
+    uae_u32 val = 0;
+    char nc;
+
+    ignore_ws (c);
+
+    while (isxdigit(nc = **c)) {
+	(*c)++;
+	val *= 16;
+	nc = toupper(nc);
+	if (isdigit(nc)) {
+	    val += nc - '0';
+	} else {
+	    val += nc - 'A' + 10;
+	}
+    }
+    return val;
+}
+
+static uae_u32 readint (char **c)
+{
+    uae_u32 val = 0;
+    char nc;
+    int negative = 0;
+
+    ignore_ws (c);
+
+    if (**c == '-')
+	negative = 1, (*c)++;
+    while (isdigit(nc = **c)) {
+	(*c)++;
+	val *= 10;
+	val += nc - '0';
+    }
+    return val * (negative ? -1 : 1);
+}
+
+static char next_char( char **c)
+{
+    ignore_ws (c);
+    return *(*c)++;
+}
+
+static int more_params (char **c)
+{
+    ignore_ws (c);
+    return (**c) != 0;
+}
+
+static void dumpmem (uaecptr addr, uaecptr *nxmem, int lines)
+{
+    broken_in = 0;
+    for (;lines-- && !broken_in;) {
+	int i;
+	printf ("%08lx ", addr);
+	for (i = 0; i < 16; i++) {
+	    printf ("%04x ", get_word(addr)); addr += 2;
+	}
+	printf ("\n");
+    }
+    *nxmem = addr;
+}
+
+static void foundmod (uae_u32 ptr, char *type)
+{
+    char name[21];
+    uae_u8 *ptr2 = chipmemory + ptr;
+    int i,length;
+
+    printf ("Found possible %s module at 0x%lx.\n", type, ptr);
+    memcpy (name, ptr2, 20);
+    name[20] = '\0';
+
+    /* Browse playlist */
+    length = 0;
+    for (i = 0x3b8; i < 0x438; i++)
+	if (ptr2[i] > length)
+	    length = ptr2[i];
+
+    length = (length+1)*1024 + 0x43c;
+
+    /* Add sample lengths */
+    ptr2 += 0x2A;
+    for (i = 0; i < 31; i++, ptr2 += 30)
+	length += 2*((ptr2[0]<<8)+ptr2[1]);
+    
+    printf ("Name \"%s\", Length 0x%lx bytes.\n", name, length);
+}
+
+static void modulesearch (void)
+{
+    uae_u8 *p = get_real_address (0);
+    uae_u32 ptr;
+
+    for (ptr = 0; ptr < allocated_chipmem - 40; ptr += 2, p += 2) {
+	/* Check for Mahoney & Kaktus */
+	/* Anyone got the format of old 15 Sample (SoundTracker)modules? */
+	if (ptr >= 0x438 && p[0] == 'M' && p[1] == '.' && p[2] == 'K' && p[3] == '.')
+	    foundmod (ptr - 0x438, "ProTracker (31 samples)");
+
+	if (ptr >= 0x438 && p[0] == 'F' && p[1] == 'L' && p[2] == 'T' && p[3] == '4')
+	    foundmod (ptr - 0x438, "Startrekker");
+
+	if (strncmp ((char *)p, "SMOD", 4) == 0) {
+	    printf ("Found possible FutureComposer 1.3 module at 0x%lx, length unknown.\n", ptr);
+	}
+	if (strncmp ((char *)p, "FC14", 4) == 0) {
+	    printf ("Found possible FutureComposer 1.4 module at 0x%lx, length unknown.\n", ptr);
+	}
+	if (p[0] == 0x48 && p[1] == 0xe7 && p[4] == 0x61 && p[5] == 0
+	    && p[8] == 0x4c && p[9] == 0xdf && p[12] == 0x4e && p[13] == 0x75
+	    && p[14] == 0x48 && p[15] == 0xe7 && p[18] == 0x61 && p[19] == 0
+	    && p[22] == 0x4c && p[23] == 0xdf && p[26] == 0x4e && p[27] == 0x75) {
+	    printf ("Found possible Whittaker module at 0x%lx, length unknown.\n", ptr);
+	}
+	if (p[4] == 0x41 && p[5] == 0xFA) {
+	    int i;
+
+	    for (i = 0; i < 0x240; i += 2)
+		if (p[i] == 0xE7 && p[i + 1] == 0x42 && p[i + 2] == 0x41 && p[i + 3] == 0xFA)
+		    break;
+	    if (i < 0x240) {
+		uae_u8 *p2 = p + i + 4;
+		for (i = 0; i < 0x30; i += 2)
+		    if (p2[i] == 0xD1 && p2[i + 1] == 0xFA) {
+			printf ("Found possible MarkII module at %lx, length unknown.\n", ptr);
+		    }
+	    }
+	}
+    }
+}
+
+static void dump_traps (void)
+{
+    int i;
+    for (i = 0; trap_labels[i].name; i++) {
+	printf("$%02x: %s\t $%08x\n", trap_labels[i].adr,
+	       trap_labels[i].name, get_long (trap_labels[i].adr));
+    }
+}
+
+static void dump_ints (void)
+{
+    int i;	
+    for (i = 0; int_labels[i].name; i++) {
+	printf ("$%02x: %s\t $%08x\n", int_labels[i].adr,
+		int_labels[i].name, get_long (int_labels[i].adr));
+    }
+}
+
+static void disassemble_wait (FILE *file, unsigned long insn)
+{
+    uae_u8 vp,hp,ve,he,bfd,v_mask,h_mask;
+
+    vp = (insn & 0xff000000) >> 24;
+    hp = (insn & 0x00fe0000) >> 16;
+    ve = (insn & 0x00007f00) >> 8;
+    he = (insn & 0x000000fe);
+    bfd = insn & 0x00008000 >> 15;
+
+    /* bit15 can never be masked out*/
+    v_mask = vp & (ve | 0x80);
+    h_mask = hp & he;
+    if (v_mask > 0) {
+	fprintf (file, "vpos ");
+	if (ve != 0x7f) {
+	    fprintf (file, "& 0x%02x ", ve);
+	}
+	fprintf (file, ">= 0x%02x", v_mask);
+    }
+    if (he > 0) {
+	if (v_mask > 0) {
+	    fprintf (file," and");
+	}
+	fprintf (file, " hpos ");
+	if (he != 0xfe) {
+	    fprintf (file, "& 0x%02x ", he);
+	}
+	fprintf (file, ">= 0x%02x", h_mask);
+    } else {
+	fprintf (file, ", ignore horizontal");
+    }
+
+    fprintf (file, ".\n                        \t; VP %02x, VE %02x; HP %02x, HE %02x; BFD %d\n",
+	     vp, ve, hp, he, bfd);
+}
+
+/* simple decode copper by Mark Cox */
+static void decode_copper_insn (FILE* file, unsigned long insn, unsigned long addr)
+{
+    uae_u32 insn_type = insn & 0x00010001;
+    int hpos, vpos;
+    char record[] = "          ";
+    if (find_copper_record (addr, &hpos, &vpos)) {
+	sprintf (record, " [%03x %03x]", vpos, hpos);
+    }
+
+    fprintf (file, "%08lx: %04lx %04lx%s\t; ", addr, insn >> 16, insn & 0xFFFF, record);
+
+    switch (insn_type) {
+    case 0x00010000: /* WAIT insn */
+	fprintf (file, "Wait for ");
+	disassemble_wait (file, insn);
+
+	if (insn == 0xfffffffe)
+	    fprintf (file, "                           \t; End of Copperlist\n");
+
+	break;
+
+    case 0x00010001: /* SKIP insn */
+	fprintf (file, "Skip if ");
+	disassemble_wait (file, insn);
+	break;
+
+    case 0x00000000:
+    case 0x00000001: /* MOVE insn */
+	fprintf (file, "%s := 0x%04lx\n",
+		 custd[(insn & 0x01fe0000) >> 17].name,
+		 insn & 0x0000ffff);
+
+	if ((insn & 0xfe000000) != 0) {
+	    fprintf (file, "                        \t;OCS Compatibility warning: bits 15-9 should be 0 for compatibility with OCS\n");
+	}
+	/* 01fe0000 register destination address
+	   fe000000 should be 0 for compatibility (at least in ocs
+	   0000ffff data to be put in register destination */
+	break;
+
+    default:
+	abort ();
+    }
+
+}
+
+
+static uaecptr decode_copperlist (FILE* file, uaecptr address, int nolines)
+{
+    uae_u32 insn;
+    while (nolines-- > 0) {
+	insn = get_long (address);
+	decode_copper_insn (file, insn, address);
+	address += 4;
+    }
+    return address;
+    /* You may wonder why I don't stop this at the end of the copperlist?
+     * Well, often nice things are hidden at the end and it is debatable the actual 
+     * values that mean the end of the copperlist */
+}
+
+
+/* cheat-search by Holger Jakob */
+static void cheatsearch (char **c)
+{
+    uae_u8 *p = get_real_address (0);
+    static uae_u32 *vlist = NULL;
+    uae_u32 ptr;
+    uae_u32 val = 0;
+    uae_u32 type = 0; /* not yet */
+    uae_u32 count = 0;
+    uae_u32 fcount = 0;
+    uae_u32 full = 0;
+    char nc;
+
+    ignore_ws (c);
+
+    while (isxdigit (nc = **c)) {
+	(*c)++;
+	val *= 10;
+	nc = toupper (nc);
+	if (isdigit (nc)) {
+	    val += nc - '0';
+	}
+    }
+    if (vlist == NULL) {
+	vlist = malloc (256*4);
+	if (vlist != 0) {
+	    for (count = 0; count<255; count++)
+		vlist[count] = 0;
+	    count = 0;
+	    for (ptr = 0; ptr < allocated_chipmem - 40; ptr += 2, p += 2) {
+		if (ptr >= 0x438 && p[3] == (val & 0xff)
+		    && p[2] == (val >> 8 & 0xff)
+		    && p[1] == (val >> 16 & 0xff)
+		    && p[0] == (val >> 24 & 0xff))
+		{
+		    if (count < 255) {
+			vlist[count++]=ptr;
+			printf ("%08x: %x%x%x%x\n",ptr,p[0],p[1],p[2],p[3]);
+		    } else
+			full = 1;
+		}
+	    }
+	    printf ("Found %d possible addresses with %d\n",count,val);
+	    printf ("Now continue with 'g' and use 'C' with a different value\n");
+	}
+    } else {
+	for (count = 0; count<255; count++) {
+	    if (p[vlist[count]+3] == (val & 0xff)
+		&& p[vlist[count]+2] == (val>>8 & 0xff) 
+		&& p[vlist[count]+1] == (val>>16 & 0xff)
+		&& p[vlist[count]] == (val>>24 & 0xff))
+	    {
+		fcount++;
+		printf ("%08x: %x%x%x%x\n", vlist[count], p[vlist[count]],
+			p[vlist[count]+1], p[vlist[count]+2], p[vlist[count]+3]);
+	    }
+	}
+	printf ("%d hits of %d found\n",fcount,val);
+	free (vlist);
+	vlist = NULL;
+    }
+}
+
+static void writeintomem (char **c)
+{
+    uae_u8 *p = get_real_address (0);
+    uae_u32 addr = 0;
+    uae_u32 val = 0;
+    char nc;
+
+    ignore_ws(c);
+    while (isxdigit(nc = **c)) {
+	(*c)++;
+	addr *= 16;
+	nc = toupper(nc);
+	if (isdigit(nc)) {
+	    addr += nc - '0';
+	} else {
+	    addr += nc - 'A' + 10;
+	}
+    }
+    ignore_ws(c);
+    while (isxdigit(nc = **c)) {
+	(*c)++;
+	val *= 10;
+	nc = toupper(nc);
+	if (isdigit(nc)) {
+	    val += nc - '0';
+	}
+    }
+
+    if (addr < allocated_chipmem) {
+      p[addr] = val>>24 & 0xff;
+      p[addr+1] = val>>16 & 0xff;
+      p[addr+2] = val>>8 & 0xff;
+      p[addr+3] = val & 0xff;
+      printf ("Wrote %d at %08x\n",val,addr);
+    } else
+      printf ("Invalid address %08x\n",addr);
+}
+
+static void show_exec_tasks (void)
+{
+    uaecptr execbase = get_long (4);
+    uaecptr taskready = get_long (execbase + 406);
+    uaecptr taskwait = get_long (execbase + 420);
+    uaecptr node, end;
+    printf ("execbase at 0x%08lx\n", (unsigned long) execbase);
+    printf ("Current:\n");
+    node = get_long (execbase + 276);
+    printf ("%08lx: %08lx %s\n", node, 0, get_real_address (get_long (node + 10)));
+    printf ("Ready:\n");
+    node = get_long (taskready);
+    end = get_long (taskready + 4);
+    while (node) {
+	printf ("%08lx: %08lx %s\n", node, 0, get_real_address (get_long (node + 10)));
+	node = get_long (node);
+    }
+    printf ("Waiting:\n");
+    node = get_long (taskwait);
+    end = get_long (taskwait + 4);
+    while (node) {
+	printf ("%08lx: %08lx %s\n", node, 0, get_real_address (get_long (node + 10)));
+	node = get_long (node);
+    }
+}
+
+static int trace_same_insn_count;
+static uae_u8 trace_insn_copy[10];
+static struct regstruct trace_prev_regs;
+void debug (void)
+{
+    char input[80];
+    uaecptr nextpc,nxdis,nxmem,nxcopper;
+
+    bogusframe = 1;
+
+    if (do_skip && skipaddr == 0xC0DEDBAD) {
+#if 0
+	if (trace_same_insn_count > 0) {
+	    if (memcmp (trace_insn_copy, regs.pc_p, 10) == 0
+		&& memcmp (trace_prev_regs.regs, regs.regs, sizeof regs.regs) == 0)
+	    {
+		trace_same_insn_count++;
+		return;
+	    }
+	}
+	if (trace_same_insn_count > 1)
+	    fprintf (logfile, "[ repeated %d times ]\n", trace_same_insn_count);
+#endif
+	m68k_dumpstate (logfile, &nextpc);
+	trace_same_insn_count = 1;
+	memcpy (trace_insn_copy, regs.pc_p, 10);
+	memcpy (&trace_prev_regs, &regs, sizeof regs);
+    }
+
+    if (do_skip && (m68k_getpc() != skipaddr/* || regs.a[0] != 0x1e558*/)) {
+	set_special (SPCFLAG_BRK);
+	return;
+    }
+    do_skip = 0;
+
+#ifdef NEED_TO_DEBUG_BADLY
+    history[lasthist] = regs;
+    historyf[lasthist] = regflags;
+#else
+    history[lasthist] = m68k_getpc();
+#endif
+    if (++lasthist == MAX_HIST) lasthist = 0;
+    if (lasthist == firsthist) {
+	if (++firsthist == MAX_HIST) firsthist = 0;
+    }
+
+    m68k_dumpstate (stdout, &nextpc);
+    nxdis = nextpc; nxmem = nxcopper = 0;
+
+    for (;;) {
+	char cmd, *inptr;
+
+	printf (">");
+	fflush (stdout);
+	if (fgets (input, 80, stdin) == 0)
+	    return;
+	inptr = input;
+	cmd = next_char (&inptr);
+	switch (cmd) {
+	case 'c': dumpcia (); dumpdisk (); dumpcustom (); break;
+	case 'i': dump_ints (); break;
+	case 'e': dump_traps (); break;
+	case 'r': m68k_dumpstate (stdout, &nextpc); break;
+	case 'M': modulesearch (); break;
+	case 'C': cheatsearch (&inptr); break; 
+	case 'W': writeintomem (&inptr); break;
+	case 'S':
+	{
+	    uae_u8 *memp;
+	    uae_u32 src, len;
+	    char *name;
+	    FILE *fp;
+
+	    if (!more_params (&inptr))
+		goto S_argh;
+
+	    name = inptr;
+	    while (*inptr != '\0' && !isspace (*inptr))
+		inptr++;
+	    if (!isspace (*inptr))
+		goto S_argh;
+
+	    *inptr = '\0';
+	    inptr++;
+	    if (!more_params (&inptr))
+		goto S_argh;
+	    src = readhex (&inptr);
+	    if (!more_params (&inptr))
+		goto S_argh;
+	    len = readhex (&inptr);
+	    if (! valid_address (src, len)) {
+		printf ("Invalid memory block\n");
+		break;
+	    }
+	    memp = get_real_address (src);
+	    fp = fopen (name, "w");
+	    if (fp == NULL) {
+		printf ("Couldn't open file\n");
+		break;
+	    }
+	    if (fwrite (memp, 1, len, fp) != len) {
+		printf ("Error writing file\n");
+	    }
+	    fclose (fp);
+	    break;
+
+	  S_argh:
+	    printf ("S command needs more arguments!\n");
+	    break;
+	}
+	case 'd':
+	{
+	    uae_u32 daddr;
+	    int count;
+
+	    if (more_params(&inptr))
+		daddr = readhex(&inptr);
+	    else
+		daddr = nxdis;
+	    if (more_params(&inptr))
+		count = readhex(&inptr);
+	    else
+		count = 10;
+	    m68k_disasm (stdout, daddr, &nxdis, count);
+	}
+	break;
+	case 'T': show_exec_tasks (); break;
+	case 't': set_special (SPCFLAG_BRK); return;
+	case 'z':
+	    skipaddr = nextpc;
+	    do_skip = 1;
+	    set_special (SPCFLAG_BRK);
+	    return;
+
+	case 'f':
+	    skipaddr = readhex (&inptr);
+	    do_skip = 1;
+	    set_special (SPCFLAG_BRK);
+	    if (skipaddr == 0xC0DEDBAD) {
+	        trace_same_insn_count = 0;
+		logfile = fopen ("uae.trace", "w");
+		memcpy (trace_insn_copy, regs.pc_p, 10);
+		memcpy (&trace_prev_regs, &regs, sizeof regs);
+	    }
+	    return;
+
+	case 'q': uae_quit();
+	    debugger_active = 0;
+	    debugging = 0;
+	    return;
+
+	case 'g':
+	    if (more_params (&inptr))
+		m68k_setpc (readhex (&inptr));
+	    fill_prefetch_0 ();
+	    debugger_active = 0;
+	    debugging = 0;
+	    return;
+
+	case 'H':
+	{
+	    int count;
+	    int temp;
+#ifdef NEED_TO_DEBUG_BADLY
+	    struct regstruct save_regs = regs;
+	    union flagu save_flags = regflags;
+#endif
+
+	    if (more_params(&inptr))
+		count = readhex(&inptr);
+	    else
+		count = 10;
+	    if (count < 0)
+		break;
+	    temp = lasthist;
+	    while (count-- > 0 && temp != firsthist) {
+		if (temp == 0) temp = MAX_HIST-1; else temp--;
+	    }
+	    while (temp != lasthist) {
+#ifdef NEED_TO_DEBUG_BADLY
+		regs = history[temp];
+		regflags = historyf[temp];
+		m68k_dumpstate (NULL);
+#else
+		m68k_disasm (stdout, history[temp], NULL, 1);
+#endif
+		if (++temp == MAX_HIST) temp = 0;
+	    }
+#ifdef NEED_TO_DEBUG_BADLY
+	    regs = save_regs;
+	    regflags = save_flags;
+#endif
+	}
+	break;
+	case 'm':
+	{
+	    uae_u32 maddr; int lines;
+	    if (more_params(&inptr))
+		maddr = readhex(&inptr);
+	    else
+		maddr = nxmem;
+	    if (more_params(&inptr))
+		lines = readhex(&inptr);
+	    else
+		lines = 16;
+	    dumpmem(maddr, &nxmem, lines);
+	}
+	break;
+	case 'o':
+	{
+	    uae_u32 maddr;
+	    int lines;
+ 
+	    if (more_params(&inptr)) {
+		maddr = readhex(&inptr);
+		if (maddr == 1 || maddr == 2)
+		    maddr = get_copper_address (maddr);		
+	    }
+	    else
+		maddr = nxcopper;
+
+	    if (more_params (&inptr))
+		lines = readhex (&inptr);
+	    else
+		lines = 10;
+
+	    nxcopper = decode_copperlist (stdout, maddr, lines);
+	    break;
+	}
+	case 'O':
+	    if (more_params (&inptr)) {
+		int plane = readint (&inptr);
+		int offs = readint (&inptr);
+		if (plane >= 0 && plane < 8)
+		    bpl_off[plane] = offs;
+	    } else {
+		int i;
+		for (i = 0; i < 8; i++)
+		    printf ("Plane %d offset %d\n", i, bpl_off[i]);
+	    }
+	    break;
+	case 'u':
+		mmu_dump_tables();
+		break;
+	case 'h':
+	case '?':
+	{
+	    printf ("          HELP for UAE Debugger\n");
+	    printf ("         -----------------------\n\n");
+	    printf ("  g: <address>          Start execution at the current address or <address>\n");
+	    printf ("  c:                    Dump state of the CIA and custom chips\n");
+	    printf ("  r:                    Dump state of the CPU\n");
+	    printf ("  m <address> <lines>:  Memory dump starting at <address>\n");
+	    printf ("  d <address> <lines>:  Disassembly starting at <address>\n");
+	    printf ("  t:                    Step one instruction\n");
+	    printf ("  z:                    Step through one instruction - useful for JSR, DBRA etc\n");
+	    printf ("  f <address>:          Step forward until PC == <address>\n");
+	    printf ("  i:                    Dump contents of interrupt registers\n");
+	    printf ("  e:                    Dump contents of trap vectors\n");
+	    printf ("  o <1|2|addr> <lines>: View memory as Copper Instructions\n");
+	    printf ("  O:                    Display bitplane offsets\n");
+	    printf ("  O <plane> <offset>:   Offset a bitplane\n");
+	    printf ("  H <count>:            Show PC history <count> instructions\n");
+	    printf ("  M:                    Search for *Tracker sound modules\n");
+	    printf ("  C <value>:            Search for values like energy or lifes in games\n");
+	    printf ("  W <address> <value>:  Write into Amiga memory\n");
+	    printf ("  S <file> <addr> <n>:  Save a block of Amiga memory\n");
+	    printf ("  T:                    Show exec tasks and their PCs\n");
+		printf ("  u:                    Dump the MMU translation tables and state\n");
+	    printf ("  h,?:                  Show this help page\n");
+	    printf ("  q:                    Quit the emulator. You don't want to use this command.\n\n");
+	}
+	break;
+	}
+    }
+}
diff -urN src-0.8.22/src/ersatz.c src-0.8.22-mmu/src/ersatz.c
--- src-0.8.22/src/ersatz.c	2001-12-17 19:38:37.000000000 +0100
+++ src-0.8.22-mmu/src/ersatz.c	2003-07-25 12:11:11.000000000 +0200
@@ -109,55 +109,55 @@
     regs.s = 0;
     /* Set some interrupt vectors */
     for (a = 8; a < 0xC0; a += 4) {
-	put_long (a, 0xF8001A);
+	phys_put_long (a, 0xF8001A);
     }
     regs.isp = regs.msp = regs.usp = 0x800;
     m68k_areg(regs, 7) = 0x80000;
     regs.intmask = 0;
 
     /* Build a dummy execbase */
-    put_long (4, m68k_areg(regs, 6) = 0x676);
-    put_byte (0x676 + 0x129, 0);
+    phys_put_long (4, m68k_areg(regs, 6) = 0x676);
+    phys_put_byte (0x676 + 0x129, 0);
     for (f = 1; f < 105; f++) {
-	put_word (0x676 - 6*f, 0x4EF9);
-	put_long (0x676 - 6*f + 2, 0xF8000C);
+	phys_put_word (0x676 - 6*f, 0x4EF9);
+	phys_put_long (0x676 - 6*f + 2, 0xF8000C);
     }
     /* Some "supported" functions */
-    put_long (0x676 - 456 + 2, 0xF80014);
-    put_long (0x676 - 216 + 2, 0xF80020);
-    put_long (0x676 - 198 + 2, 0xF80026);
+    phys_put_long (0x676 - 456 + 2, 0xF80014);
+    phys_put_long (0x676 - 216 + 2, 0xF80020);
+    phys_put_long (0x676 - 198 + 2, 0xF80026);
     put_long (0x676 - 204 + 2, 0xF8002c);
-    put_long (0x676 - 210 + 2, 0xF8002a);
+    phys_put_long (0x676 - 210 + 2, 0xF8002a);
 
     /* Build an IORequest */
     request = 0x800;
-    put_word (request + 0x1C, 2);
-    put_long (request + 0x28, 0x4000);
-    put_long (request + 0x2C, 0);
-    put_long (request + 0x24, 0x200 * 4);
+    phys_put_word (request + 0x1C, 2);
+    phys_put_long (request + 0x28, 0x4000);
+    phys_put_long (request + 0x2C, 0);
+    phys_put_long (request + 0x24, 0x200 * 4);
     m68k_areg(regs, 1) = request;
     ersatz_doio ();
     /* kickstart disk loader */
-    if (get_long(0x4000) == 0x4b49434b) {
+    if (phys_get_long(0x4000) == 0x4b49434b) {
 	/* a kickstart disk was found in drive 0! */
 	write_log ("Loading Kickstart rom image from Kickstart disk\n");
 	/* print some notes... */
 	write_log ("NOTE: if UAE crashes set CPU to 68000 and/or chipmem size to 512KB!\n");
 
 	/* read rom image from kickstart disk */
-	put_word (request + 0x1C, 2);
-	put_long (request + 0x28, 0xF80000);
-	put_long (request + 0x2C, 0x200);
-	put_long (request + 0x24, 0x200 * 512);
+	phys_put_word (request + 0x1C, 2);
+	phys_put_long (request + 0x28, 0xF80000);
+	phys_put_long (request + 0x2C, 0x200);
+	phys_put_long (request + 0x24, 0x200 * 512);
 	m68k_areg(regs, 1) = request;
 	ersatz_doio ();
 
 	/* read rom image once again to mirror address space.
 	   not elegant, but it works... */
-	put_word (request + 0x1C, 2);
-	put_long (request + 0x28, 0xFC0000);
-	put_long (request + 0x2C, 0x200);
-	put_long (request + 0x24, 0x200 * 512);
+	phys_put_word (request + 0x1C, 2);
+	phys_put_long (request + 0x28, 0xFC0000);
+	phys_put_long (request + 0x2C, 0x200);
+	phys_put_long (request + 0x24, 0x200 * 512);
 	m68k_areg(regs, 1) = request;
 	ersatz_doio ();
 
@@ -174,25 +174,25 @@
     fill_prefetch_0 ();
 
     /* Init the hardware */
-    put_long (0x3000, 0xFFFFFFFEul);
-    put_long (0xDFF080, 0x3000);
-    put_word (0xDFF088, 0);
-    put_word (0xDFF096, 0xE390);
-    put_word (0xDFF09A, 0xE02C);
-    put_word (0xDFF09E, 0x0000);
-    put_word (0xDFF092, 0x0038);
-    put_word (0xDFF094, 0x00D0);
-    put_word (0xDFF08E, 0x2C81);
-    put_word (0xDFF090, 0xF4C1);
-    put_word (0xDFF02A, 0x8000);
-
-    put_byte (0xBFD100, 0xF7);
-    put_byte (0xBFEE01, 0);
-    put_byte (0xBFEF01, 0x08);
-    put_byte (0xBFDE00, 0x04);
-    put_byte (0xBFDF00, 0x84);
-    put_byte (0xBFDD00, 0x9F);
-    put_byte (0xBFED01, 0x9F);
+    phys_put_long (0x3000, 0xFFFFFFFEul);
+    phys_put_long (0xDFF080, 0x3000);
+    phys_put_word (0xDFF088, 0);
+    phys_put_word (0xDFF096, 0xE390);
+    phys_put_word (0xDFF09A, 0xE02C);
+    phys_put_word (0xDFF09E, 0x0000);
+    phys_put_word (0xDFF092, 0x0038);
+    phys_put_word (0xDFF094, 0x00D0);
+    phys_put_word (0xDFF08E, 0x2C81);
+    phys_put_word (0xDFF090, 0xF4C1);
+    phys_put_word (0xDFF02A, 0x8000);
+
+    phys_put_byte (0xBFD100, 0xF7);
+    phys_put_byte (0xBFEE01, 0);
+    phys_put_byte (0xBFEF01, 0x08);
+    phys_put_byte (0xBFDE00, 0x04);
+    phys_put_byte (0xBFDF00, 0x84);
+    phys_put_byte (0xBFDD00, 0x9F);
+    phys_put_byte (0xBFED01, 0x9F);
 }
 
 void ersatz_perform (uae_u16 what)
@@ -204,7 +204,7 @@
 
      case EOP_SERVEINT:
 	/* Just reset all the interrupt request bits */
-	put_word (0xDFF09C, get_word (0xDFF01E) & 0x3FFF);
+	phys_put_word (0xDFF09C, phys_get_word (0xDFF01E) & 0x3FFF);
 	break;
 
      case EOP_DOIO:
diff -urN src-0.8.22/src/expansion.c src-0.8.22-mmu/src/expansion.c
--- src-0.8.22/src/expansion.c	2001-12-30 16:31:03.000000000 +0100
+++ src-0.8.22-mmu/src/expansion.c	2003-07-25 12:11:11.000000000 +0200
@@ -19,6 +19,8 @@
 #include "autoconf.h"
 #include "picasso96.h"
 #include "savestate.h"
+#include "custom.h"
+#include "newcpu.h"
 
 #define MAX_EXPANSION_BOARDS	8
 
@@ -843,9 +845,9 @@
     /* check if Kickstart version is below 1.3 */
     if (! ersatzkickfile
 	&& (/* Kickstart 1.0 & 1.1! */
-	    get_word (0xF8000C) == 0xFFFF
+	    phys_get_word (0xF8000C) == 0xFFFF
 	    /* Kickstart < 1.3 */
-	    || get_word (0xF8000C) < 34))
+	    || phys_get_word (0xF8000C) < 34))
     {
 	/* warn user */
 	write_log ("Kickstart version is below 1.3!  Disabling autoconfig devices.\n");
diff -urN src-0.8.22/src/gencpu.c src-0.8.22-mmu/src/gencpu.c
--- src-0.8.22/src/gencpu.c	2001-12-17 19:38:37.000000000 +0100
+++ src-0.8.22-mmu/src/gencpu.c	2003-07-25 12:24:59.000000000 +0200
@@ -16,6 +16,7 @@
  * The source for the insn timings is Markt & Technik's Amiga Magazin 8/1992.
  *
  * Copyright 1995, 1996, 1997, 1998, 1999, 2000 Bernd Schmidt
+ * vim:ts=8:sw=4:
  */
 
 #include "sysconfig.h"
@@ -25,12 +26,14 @@
 #include "readcpu.h"
 
 #define BOOL_TYPE "int"
+#define VERIFY_MMU_GENAMODE	0
 
 static FILE *headerfile;
 static FILE *stblfile;
 
 static int using_prefetch;
 static int using_exception_3;
+static int using_mmu;
 static int cpu_level;
 
 /* For the current opcode, the next lower level that will have different code.
@@ -43,6 +46,20 @@
 static int *opcode_last_postfix;
 static unsigned long *counts;
 
+
+#define GENA_GETV_NO_FETCH	0
+#define GENA_GETV_FETCH		1
+#define GENA_GETV_FETCH_ALIGN 2
+#define GENA_MOVEM_DO_INC	0
+#define GENA_MOVEM_NO_INC	1
+#define GENA_MOVEM_MOVE16	2
+
+#define XLATE_LOG	0
+#define XLATE_PHYS	1
+#define XLATE_SFC	2
+#define XLATE_DFC	3
+static char * mem_prefix[4] = { "", "phys_", "sfc_", "dfc_" };
+
 static void read_counts (void)
 {
     FILE *file;
@@ -133,11 +150,16 @@
     return 0;
 }
 
+int nexti_no_inc = 0;
+
+
 static const char *gen_nextilong (void)
 {
     static char buffer[80];
     int r = m68k_pc_offset;
-    m68k_pc_offset += 4;
+
+    if (!nexti_no_inc)
+        m68k_pc_offset += 4;
 
     insn_n_cycles += 8;
 
@@ -152,7 +174,9 @@
 {
     static char buffer[80];
     int r = m68k_pc_offset;
-    m68k_pc_offset += 2;
+	
+    if (!nexti_no_inc)
+        m68k_pc_offset += 2;
 
     insn_n_cycles += 4;
 
@@ -212,14 +236,19 @@
 /* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0,
  * the calling routine handles Apdi and Aipi modes.
  * gb-- movem == 2 means the same thing but for a MOVE16 instruction */
-static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem)
+static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag)
+
 {
+
+    if (!using_mmu)
+        xlateflag = XLATE_PHYS;
+	
     start_brace ();
     switch (mode) {
     case Dreg:
 	if (movem)
 	    abort ();
-	if (getv == 1)
+	if (getv == GENA_GETV_FETCH)
 	    switch (size) {
 	    case sz_byte:
 #if defined(AMIGA) && !defined(WARPUP)
@@ -247,7 +276,7 @@
     case Areg:
 	if (movem)
 	    abort ();
-	if (getv == 1)
+	if (getv == GENA_GETV_FETCH)
 	    switch (size) {
 	    case sz_word:
 		printf ("\tuae_s16 %s = m68k_areg(regs, %s);\n", name, reg);
@@ -329,7 +358,7 @@
 	printf ("\tuaecptr %sa = %s;\n", name, gen_nextilong ());
 	break;
     case imm:
-	if (getv != 1)
+	if (getv != GENA_GETV_FETCH)
 	    abort ();
 	switch (size) {
 	case sz_byte:
@@ -346,22 +375,22 @@
 	}
 	return;
     case imm0:
-	if (getv != 1)
+	if (getv != GENA_GETV_FETCH)
 	    abort ();
 	printf ("\tuae_s8 %s = %s;\n", name, gen_nextibyte ());
 	return;
     case imm1:
-	if (getv != 1)
+	if (getv != GENA_GETV_FETCH)
 	    abort ();
 	printf ("\tuae_s16 %s = %s;\n", name, gen_nextiword ());
 	return;
     case imm2:
-	if (getv != 1)
+	if (getv != GENA_GETV_FETCH)
 	    abort ();
 	printf ("\tuae_s32 %s = %s;\n", name, gen_nextilong ());
 	return;
     case immi:
-	if (getv != 1)
+	if (getv != GENA_GETV_FETCH)
 	    abort ();
 	printf ("\tuae_u32 %s = %s;\n", name, reg);
 	return;
@@ -372,7 +401,7 @@
     /* We get here for all non-reg non-immediate addressing modes to
      * actually fetch the value. */
 
-    if (using_exception_3 && getv != 0 && size != sz_byte) {	    
+    if (using_exception_3 && getv != GENA_GETV_NO_FETCH && size != sz_byte) {	    
 	printf ("\tif ((%sa & 1) != 0) {\n", name);
 	printf ("\t\tlast_fault_for_exception_3 = %sa;\n", name);
 	printf ("\t\tlast_op_for_exception_3 = opcode;\n");
@@ -384,7 +413,7 @@
 	start_brace ();
     }
 
-    if (getv == 1) {
+    if (getv == GENA_GETV_FETCH) {
 	switch (size) {
 	case sz_byte: insn_n_cycles += 4; break;
 	case sz_word: insn_n_cycles += 4; break;
@@ -393,9 +422,9 @@
 	}
 	start_brace ();
 	switch (size) {
-	case sz_byte: printf ("\tuae_s8 %s = get_byte(%sa);\n", name, name); break;
-	case sz_word: printf ("\tuae_s16 %s = get_word(%sa);\n", name, name); break;
-	case sz_long: printf ("\tuae_s32 %s = get_long(%sa);\n", name, name); break;
+	case sz_byte: printf ("\tuae_s8 %s = %sget_byte(%sa);\n", name, mem_prefix[xlateflag], name); break;
+	case sz_word: printf ("\tuae_s16 %s = %sget_word(%sa);\n", name, mem_prefix[xlateflag], name); break;
+	case sz_long: printf ("\tuae_s32 %s = %sget_long(%sa);\n", name, mem_prefix[xlateflag], name); break;
 	default: abort ();
 	}
     }
@@ -427,7 +456,7 @@
 	}
 }
 
-static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to)
+static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to, int xlateflag)
 {
     switch (mode) {
      case Dreg:
@@ -472,19 +501,19 @@
 	switch (size) {
 	 case sz_byte:
 	    insn_n_cycles += 4;
-	    printf ("\tput_byte(%sa,%s);\n", to, from);
+	    printf ("\t%sput_byte(%sa,%s);\n", mem_prefix[xlateflag], to, from);
 	    break;
 	 case sz_word:
 	    insn_n_cycles += 4;
 	    if (cpu_level < 2 && (mode == PC16 || mode == PC8r))
 		abort ();
-	    printf ("\tput_word(%sa,%s);\n", to, from);
+	    printf ("\t%sput_word(%sa,%s);\n", mem_prefix[xlateflag], to, from);
 	    break;
 	 case sz_long:
 	    insn_n_cycles += 8;
 	    if (cpu_level < 2 && (mode == PC16 || mode == PC8r))
 		abort ();
-	    printf ("\tput_long(%sa,%s);\n", to, from);
+	    printf ("\t%sput_long(%sa,%s);\n", mem_prefix[xlateflag], to, from);
 	    break;
 	 default:
 	    abort ();
@@ -506,16 +535,20 @@
 {
     char getcode[100];
     int size = table68k[opcode].size == sz_long ? 4 : 2;
+    int xlateflag = using_mmu ? XLATE_LOG : XLATE_PHYS;
 
     if (table68k[opcode].size == sz_long) {
-	strcpy (getcode, "get_long(srca)");
+        strcpy (getcode, mem_prefix[xlateflag]);
+	strcat (getcode, "get_long(srca)");
     } else {
-	strcpy (getcode, "(uae_s32)(uae_s16)get_word(srca)");
+        strcpy (getcode, "(uae_s32)(uae_s16)");
+	strcat (getcode, mem_prefix[xlateflag]);
+	strcat (getcode, "get_word(srca)");
     }
 
     printf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
     printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n");
-    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
+    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, xlateflag);
     start_brace ();
     printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s; srca += %d; dmask = movem_next[dmask]; }\n",
 	    getcode, size);
@@ -530,14 +563,19 @@
 {
     char putcode[100];
     int size = table68k[opcode].size == sz_long ? 4 : 2;
+	int noxlate = using_mmu ? XLATE_LOG : XLATE_PHYS;
+
+	strcpy(putcode, mem_prefix[noxlate]);
+
     if (table68k[opcode].size == sz_long) {
-	strcpy (putcode, "put_long(srca,");
+	strcat (putcode, "put_long(srca,");
     } else {
-	strcpy (putcode, "put_word(srca,");
+	strcat (putcode, "put_word(srca,");
     }
 
     printf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
-    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", 2, 1);
+    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src",
+			GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG);
     if (using_prefetch)
 	sync_m68k_pc ();
 
@@ -825,6 +863,7 @@
 static void gen_opcode (unsigned long int opcode)
 {
     struct instr *curi = table68k + opcode;
+    int xlateflag = using_mmu ? XLATE_LOG : XLATE_PHYS;
     insn_n_cycles = 4;
 
     start_brace ();
@@ -859,16 +898,16 @@
     case i_OR:
     case i_AND:
     case i_EOR:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tsrc %c= dst;\n", curi->mnemo == i_OR ? '|' : curi->mnemo == i_AND ? '&' : '^');
 	genflags (flag_logical, curi->size, "src", "", "");
-	genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_ORSR:
     case i_EORSR:
 	printf ("\tMakeSR();\n");
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte) {
 	    printf ("\tsrc &= 0xFF;\n");
 	}
@@ -877,7 +916,7 @@
 	break;
     case i_ANDSR:
 	printf ("\tMakeSR();\n");
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte) {
 	    printf ("\tsrc |= 0xFF00;\n");
 	}
@@ -885,31 +924,31 @@
 	printf ("\tMakeFromSR();\n");
 	break;
     case i_SUB:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	genflags (flag_sub, curi->size, "newv", "src", "dst");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_SUBA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = dst - src;\n");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	break;
     case i_SUBX:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = dst - src - (GET_XFLG ? 1 : 0);\n");
 	genflags (flag_subx, curi->size, "newv", "src", "dst");
 	genflags (flag_zn, curi->size, "newv", "", "");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_SBCD:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u16 newv_lo = (dst & 0xF) - (src & 0xF) - (GET_XFLG ? 1 : 0);\n");
 	printf ("\tuae_u16 newv_hi = (dst & 0xF0) - (src & 0xF0);\n");
@@ -922,34 +961,34 @@
 	duplicate_carry ();
 	genflags (flag_zn, curi->size, "newv", "", "");
 	printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_ADD:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	genflags (flag_add, curi->size, "newv", "src", "dst");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_ADDA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = dst + src;\n");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	break;
     case i_ADDX:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = dst + src + (GET_XFLG ? 1 : 0);\n");
 	genflags (flag_addx, curi->size, "newv", "src", "dst");
 	genflags (flag_zn, curi->size, "newv", "", "");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_ABCD:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u16 newv_lo = (src & 0xF) + (dst & 0xF) + (GET_XFLG ? 1 : 0);\n");
 	printf ("\tuae_u16 newv_hi = (src & 0xF0) + (dst & 0xF0);\n");
@@ -963,24 +1002,24 @@
 	duplicate_carry ();
 	genflags (flag_zn, curi->size, "newv", "", "");
 	printf ("\tSET_VFLG ((tmp_newv & 0x80) == 0 && (newv & 0x80) != 0);\n");
-	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_NEG:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	genflags (flag_sub, curi->size, "dst", "src", "0");
-	genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_NEGX:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = 0 - src - (GET_XFLG ? 1 : 0);\n");
 	genflags (flag_subx, curi->size, "newv", "src", "0");
 	genflags (flag_zn, curi->size, "newv", "", "");
-	genastore ("newv", curi->smode, "srcreg", curi->size, "src");
+	genastore ("newv", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_NBCD:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG ? 1 : 0);\n");
 	printf ("\tuae_u16 newv_hi = - (src & 0xF0);\n");
@@ -993,27 +1032,27 @@
 	printf ("\tSET_CFLG (cflg);\n");
 	duplicate_carry();
 	genflags (flag_zn, curi->size, "newv", "", "");
-	genastore ("newv", curi->smode, "srcreg", curi->size, "src");
+	genastore ("newv", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_CLR:
-	genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	genflags (flag_logical, curi->size, "0", "", "");
-	genastore ("0", curi->smode, "srcreg", curi->size, "src");
+	genastore ("0", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_NOT:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 dst = ~src;\n");
 	genflags (flag_logical, curi->size, "dst", "", "");
-	genastore ("dst", curi->smode, "srcreg", curi->size, "src");
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_TST:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	genflags (flag_logical, curi->size, "src", "", "");
 	break;
     case i_BTST:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte)
 	    printf ("\tsrc &= 7;\n");
 	else
@@ -1021,55 +1060,55 @@
 	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
 	break;
     case i_BCHG:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte)
 	    printf ("\tsrc &= 7;\n");
 	else
 	    printf ("\tsrc &= 31;\n");
 	printf ("\tdst ^= (1 << src);\n");
 	printf ("\tSET_ZFLG (((uae_u32)dst & (1 << src)) >> src);\n");
-	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_BCLR:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte)
 	    printf ("\tsrc &= 7;\n");
 	else
 	    printf ("\tsrc &= 31;\n");
 	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
 	printf ("\tdst &= ~(1 << src);\n");
-	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_BSET:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte)
 	    printf ("\tsrc &= 7;\n");
 	else
 	    printf ("\tsrc &= 31;\n");
 	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
 	printf ("\tdst |= (1 << src);\n");
-	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_CMPM:
     case i_CMP:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	genflags (flag_cmp, curi->size, "newv", "src", "dst");
 	break;
     case i_CMPA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	genflags (flag_cmp, sz_long, "newv", "src", "dst");
 	break;
 	/* The next two are coded a little unconventional, but they are doing
 	 * weird things... */
     case i_MVPRM:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 
 	printf ("\tuaecptr memp = m68k_areg(regs, dstreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
 	if (curi->size == sz_word) {
@@ -1081,41 +1120,41 @@
 	break;
     case i_MVPMR:
 	printf ("\tuaecptr memp = m68k_areg(regs, srcreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_word) {
 	    printf ("\tuae_u16 val = (get_byte(memp) << 8) + get_byte(memp + 2);\n");
 	} else {
 	    printf ("\tuae_u32 val = (get_byte(memp) << 24) + (get_byte(memp + 2) << 16)\n");
 	    printf ("              + (get_byte(memp + 4) << 8) + get_byte(memp + 6);\n");
 	}
-	genastore ("val", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_MOVE:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	genflags (flag_logical, curi->size, "src", "", "");
-	genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_MOVEA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_word) {
 	    printf ("\tuae_u32 val = (uae_s32)(uae_s16)src;\n");
 	} else {
 	    printf ("\tuae_u32 val = src;\n");
 	}
-	genastore ("val", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("val", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	break;
     case i_MVSR2:
-	genamode (curi->smode, "srcreg", sz_word, "src", 2, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tMakeSR();\n");
 	if (curi->size == sz_byte)
-	    genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src");
+	    genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src", xlateflag);
 	else
-	    genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src");
+	    genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src", xlateflag);
 	break;
     case i_MV2SR:
-	genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	if (curi->size == sz_byte)
 	    printf ("\tMakeSR();\n\tregs.sr &= 0xFF00;\n\tregs.sr |= src & 0xFF;\n");
 	else {
@@ -1124,20 +1163,20 @@
 	printf ("\tMakeFromSR();\n");
 	break;
     case i_SWAP:
-	genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 dst = ((src >> 16)&0xFFFF) | ((src&0xFFFF)<<16);\n");
 	genflags (flag_logical, sz_long, "dst", "", "");
-	genastore ("dst", curi->smode, "srcreg", sz_long, "src");
+	genastore ("dst", curi->smode, "srcreg", sz_long, "src", xlateflag);
 	break;
     case i_EXG:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
-	genastore ("dst", curi->smode, "srcreg", curi->size, "src");
-	genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_EXT:
-	genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break;
@@ -1148,7 +1187,7 @@
 	genflags (flag_logical,
 		  curi->size == sz_word ? sz_word : sz_long, "dst", "", "");
 	genastore ("dst", curi->smode, "srcreg",
-		   curi->size == sz_word ? sz_word : sz_long, "src");
+		   curi->size == sz_word ? sz_word : sz_long, "src", xlateflag);
 	break;
     case i_MVMEL:
 	genmovemel (opcode);
@@ -1157,18 +1196,18 @@
 	genmovemle (opcode);
 	break;
     case i_TRAP:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tException(src+32,0);\n");
 	m68k_pc_offset = 0;
 	break;
     case i_MVR2USP:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tregs.usp = src;\n");
 	break;
     case i_MVUSP2R:
-	genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
-	genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_RESET:
 	printf ("\tcustomreset();\n");
@@ -1176,15 +1215,15 @@
     case i_NOP:
 	break;
     case i_STOP:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tregs.sr = src;\n");
 	printf ("\tMakeFromSR();\n");
 	printf ("\tm68k_setstopped(1);\n");
 	break;
     case i_RTE:
 	if (cpu_level == 0) {
-	    genamode (Aipi, "7", sz_word, "sr", 1, 0);
-	    genamode (Aipi, "7", sz_long, "pc", 1, 0);
+	    genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	    printf ("\tregs.sr = sr; m68k_setpc_rte(pc);\n");
 	    fill_prefetch_0 ();
 	    printf ("\tMakeFromSR();\n");
@@ -1193,13 +1232,14 @@
 	    if (next_cpu_level < 0)
 		next_cpu_level = 0;
 	    printf ("\tuae_u16 newsr; uae_u32 newpc; for (;;) {\n");
-	    genamode (Aipi, "7", sz_word, "sr", 1, 0);
-	    genamode (Aipi, "7", sz_long, "pc", 1, 0);
-	    genamode (Aipi, "7", sz_word, "format", 1, 0);
+	    genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_word, "format", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	    printf ("\tnewsr = sr; newpc = pc;\n");
 	    printf ("\tif ((format & 0xF000) == 0x0000) { break; }\n");
 	    printf ("\telse if ((format & 0xF000) == 0x1000) { ; }\n");
 	    printf ("\telse if ((format & 0xF000) == 0x2000) { m68k_areg(regs, 7) += 4; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x7000) { in_exception_2--; write_log(\"RTE: 2\\n\"); m68k_areg(regs, 7) += 60; break; }\n");
 	    printf ("\telse if ((format & 0xF000) == 0x8000) { m68k_areg(regs, 7) += 50; break; }\n");
 	    printf ("\telse if ((format & 0xF000) == 0x9000) { m68k_areg(regs, 7) += 12; break; }\n");
 	    printf ("\telse if ((format & 0xF000) == 0xa000) { m68k_areg(regs, 7) += 24; break; }\n");
@@ -1217,8 +1257,8 @@
 	break;
     case i_RTD:
 	printf ("\tcompiler_flush_jsr_stack();\n");
-	genamode (Aipi, "7", sz_long, "pc", 1, 0);
-	genamode (curi->smode, "srcreg", curi->size, "offs", 1, 0);
+	genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->smode, "srcreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tm68k_areg(regs, 7) += offs;\n");
 	printf ("\tm68k_setpc_rte(pc);\n");
 	fill_prefetch_0 ();
@@ -1226,18 +1266,18 @@
 	m68k_pc_offset = 0;
 	break;
     case i_LINK:
-	genamode (Apdi, "7", sz_long, "old", 2, 0);
-	genamode (curi->smode, "srcreg", sz_long, "src", 1, 0);
-	genastore ("src", Apdi, "7", sz_long, "old");
-	genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src");
-	genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
+	genamode (Apdi, "7", sz_long, "old", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("src", Apdi, "7", sz_long, "old", xlateflag);
+	genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src", xlateflag);
+	genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tm68k_areg(regs, 7) += offs;\n");
 	break;
     case i_UNLK:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tm68k_areg(regs, 7) = src;\n");
-	genamode (Aipi, "7", sz_long, "old", 1, 0);
-	genastore ("old", curi->smode, "srcreg", curi->size, "src");
+	genamode (Aipi, "7", sz_long, "old", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("old", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_RTS:
 	printf ("\tm68k_do_rts();\n");
@@ -1252,8 +1292,8 @@
     case i_RTR:
 	printf ("\tcompiler_flush_jsr_stack();\n");
 	printf ("\tMakeSR();\n");
-	genamode (Aipi, "7", sz_word, "sr", 1, 0);
-	genamode (Aipi, "7", sz_long, "pc", 1, 0);
+	genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tregs.sr &= 0xFF00; sr &= 0xFF;\n");
 	printf ("\tregs.sr |= sr; m68k_setpc(pc);\n");
 	fill_prefetch_0 ();
@@ -1261,19 +1301,19 @@
 	m68k_pc_offset = 0;
 	break;
     case i_JSR:
-	genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
 	printf ("\tm68k_do_jsr(m68k_getpc() + %d, srca);\n", m68k_pc_offset);
 	fill_prefetch_0 ();
 	m68k_pc_offset = 0;
 	break;
     case i_JMP:
-	genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
 	printf ("\tm68k_setpc(srca);\n");
 	fill_prefetch_0 ();
 	m68k_pc_offset = 0;
 	break;
     case i_BSR:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
 	printf ("\tuae_s32 s = (uae_s32)src + 2;\n");
 	if (using_exception_3) {
 	    printf ("\tif (src & 1) {\n");
@@ -1301,7 +1341,7 @@
 		    next_cpu_level = 1;
 	    }
 	}
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
 	printf ("\tif (!cctrue(%d)) goto didnt_jump;\n", curi->cc);
 	if (using_exception_3) {
 	    printf ("\tif (src & 1) {\n");
@@ -1323,21 +1363,21 @@
 	insn_n_cycles = curi->size == sz_byte ? 8 : 12;
 	break;
     case i_LEA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
-	genastore ("srca", curi->dmode, "dstreg", curi->size, "dst");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("srca", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	break;
     case i_PEA:
-	genamode (curi->smode, "srcreg", curi->size, "src", 0, 0);
-	genamode (Apdi, "7", sz_long, "dst", 2, 0);
-	genastore ("srca", Apdi, "7", sz_long, "dst");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (Apdi, "7", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("srca", Apdi, "7", sz_long, "dst", xlateflag);
 	break;
     case i_DBcc:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "offs", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 
 	printf ("\tif (!cctrue(%d)) {\n", curi->cc);
-	genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src");
+	genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src", xlateflag);
 
 	printf ("\t\tif (src) {\n");
 	if (using_exception_3) {
@@ -1362,15 +1402,15 @@
 	need_endlabel = 1;
 	break;
     case i_Scc:
-	genamode (curi->smode, "srcreg", curi->size, "src", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tint val = cctrue(%d) ? 0xff : 0;\n", curi->cc);
-	genastore ("val", curi->smode, "srcreg", curi->size, "src");
+	genastore ("val", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_DIVU:
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
-	genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	/* Clear V flag when dividing by zero - Alcatraz Odyssey demo depends
 	 * on this (actually, it's doing a DIVS).  */
@@ -1382,7 +1422,7 @@
 	printf ("\tif (newv > 0xffff) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n");
 	genflags (flag_logical, sz_word, "newv", "", "");
 	printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	printf ("\t}\n");
 	printf ("\t}\n");
 	insn_n_cycles += 136;
@@ -1390,8 +1430,8 @@
 	break;
     case i_DIVS:
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
-	genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tif (src == 0) { SET_VFLG (0); Exception(5,oldpc); goto %s; } else {\n", endlabelstr);
 	printf ("\tuae_s32 newv = (uae_s32)dst / (uae_s32)(uae_s16)src;\n");
@@ -1400,34 +1440,34 @@
 	printf ("\tif (((uae_s16)rem < 0) != ((uae_s32)dst < 0)) rem = -rem;\n");
 	genflags (flag_logical, sz_word, "newv", "", "");
 	printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	printf ("\t}\n");
 	printf ("\t}\n");
 	insn_n_cycles += 154;
 	need_endlabel = 1;
 	break;
     case i_MULU:
-	genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = (uae_u32)(uae_u16)dst * (uae_u32)(uae_u16)src;\n");
 	genflags (flag_logical, sz_long, "newv", "", "");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	insn_n_cycles += 66;
 	break;
     case i_MULS:
-	genamode (curi->smode, "srcreg", sz_word, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_word, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_u32 newv = (uae_s32)(uae_s16)dst * (uae_s32)(uae_s16)src;\n");
 	genflags (flag_logical, sz_long, "newv", "", "");
-	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
 	insn_n_cycles += 66;
 	break;
     case i_CHK:
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tif ((uae_s32)dst < 0) { SET_NFLG (1); Exception(6,oldpc); goto %s; }\n", endlabelstr);
 	printf ("\telse if (dst > src) { SET_NFLG (0); Exception(6,oldpc); goto %s; }\n", endlabelstr);
 	need_endlabel = 1;
@@ -1435,8 +1475,8 @@
 
     case i_CHK2:
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\t{uae_s32 upper,lower,reg = regs.regs[(extra >> 12) & 15];\n");
 	switch (curi->size) {
 	case sz_byte:
@@ -1460,8 +1500,8 @@
 	break;
 
     case i_ASR:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1490,11 +1530,11 @@
 	printf ("\t\tval &= %s;\n", bit_mask (curi->size));
 	printf ("\t}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ASL:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1526,11 +1566,11 @@
 	printf ("\t\tval &= %s;\n", bit_mask (curi->size));
 	printf ("\t}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_LSR:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1555,11 +1595,11 @@
 	printf ("\t\tval >>= 1;\n");
 	printf ("\t}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_LSL:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1585,11 +1625,11 @@
 	printf ("\tval &= %s;\n", bit_mask (curi->size));
 	printf ("\t}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ROL:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1612,11 +1652,11 @@
 	printf ("\tSET_CFLG (val & 1);\n");
 	printf ("}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ROR:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1639,11 +1679,11 @@
 	printf ("\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1);
 	printf ("\t}\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ROXL:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1669,11 +1709,11 @@
 	printf ("\t} }\n");
 	printf ("\tSET_CFLG (GET_XFLG);\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ROXR:
-	genamode (curi->smode, "srcreg", curi->size, "cnt", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1702,10 +1742,10 @@
 	printf ("\t} }\n");
 	printf ("\tSET_CFLG (GET_XFLG);\n");
 	genflags (flag_logical_noclobber, curi->size, "val", "", "");
-	genastore ("val", curi->dmode, "dstreg", curi->size, "data");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
 	break;
     case i_ASRW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1719,10 +1759,10 @@
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("\tSET_CFLG (cflg);\n");
 	duplicate_carry ();
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_ASLW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1739,10 +1779,10 @@
 	duplicate_carry ();
 
 	printf ("\tSET_VFLG (GET_VFLG | (sign2 != sign));\n");
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_LSRW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
@@ -1755,10 +1795,10 @@
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry);\n");
 	duplicate_carry ();
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_LSLW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
@@ -1771,10 +1811,10 @@
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
 	duplicate_carry ();
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_ROLW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
@@ -1787,10 +1827,10 @@
 	printf ("\tif (carry)  val |= 1;\n");
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_RORW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
@@ -1803,10 +1843,10 @@
 	printf ("\tif (carry) val |= %s;\n", cmask (curi->size));
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry);\n");
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_ROXLW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
@@ -1820,10 +1860,10 @@
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
 	duplicate_carry ();
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_ROXRW:
-	genamode (curi->smode, "srcreg", curi->size, "data", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	switch (curi->size) {
 	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
@@ -1837,17 +1877,17 @@
 	genflags (flag_logical, curi->size, "val", "", "");
 	printf ("SET_CFLG (carry);\n");
 	duplicate_carry ();
-	genastore ("val", curi->smode, "srcreg", curi->size, "data");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
 	break;
     case i_MOVEC2:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tint regno = (src >> 12) & 15;\n");
 	printf ("\tuae_u32 *regp = regs.regs + regno;\n");
 	printf ("\tif (! m68k_movec2(src & 0xFFF, regp)) goto %s;\n", endlabelstr);
 	break;
     case i_MOVE2C:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tint regno = (src >> 12) & 15;\n");
 	printf ("\tuae_u32 *regp = regs.regs + regno;\n");
@@ -1856,8 +1896,8 @@
     case i_CAS:
     {
 	int old_brace_level;
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tint ru = (src >> 6) & 7;\n");
 	printf ("\tint rc = src & 7;\n");
@@ -1865,7 +1905,7 @@
 	printf ("\tif (GET_ZFLG)");
 	old_brace_level = n_braces;
 	start_brace ();
-	genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst");
+	genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
 	pop_braces (old_brace_level);
 	printf ("else");
 	start_brace ();
@@ -1874,7 +1914,7 @@
     }
     break;
     case i_CAS2:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tuae_u32 rn1 = regs.regs[(extra >> 28) & 15];\n");
 	printf ("\tuae_u32 rn2 = regs.regs[(extra >> 12) & 15];\n");
 	if (curi->size == sz_word) {
@@ -1909,31 +1949,41 @@
 	    printf ("\t}\n");
 	}
 	break;
-    case i_MOVES:		/* ignore DFC and SFC because we have no MMU */
+    case i_MOVES:
     {
-	int old_brace_level;
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
-	printf ("\tif (extra & 0x800)\n");
-	old_brace_level = n_braces;
-	start_brace ();
-	printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n");
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 2, 0);
-	genastore ("src", curi->dmode, "dstreg", curi->size, "dst");
-	pop_braces (old_brace_level);
-	printf ("else");
-	start_brace ();
-	genamode (curi->dmode, "dstreg", curi->size, "src", 1, 0);
-	printf ("\tif (extra & 0x8000) {\n");
-	switch (curi->size) {
-	case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break;
-	case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break;
-	case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break;
-	default: abort ();
+        int old_brace_level;
+
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace();
+	printf ("\tif (extra & 0x0800)\n");	/* from reg to ea */
+	{
+	    /* use DFC */
+	    old_brace_level = n_braces;
+	    start_brace ();
+	    printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n");
+	    nexti_no_inc = 1; /* prevent strange problems with misaligned insns */
+	    genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_DFC);
+	    nexti_no_inc = 0;
+	    genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_DFC);
+	    pop_braces (old_brace_level);
+	}
+	printf ("else");	/* from ea to reg */
+	{
+	    /* use SFC */
+	    start_brace ();
+	    genamode (curi->dmode, "dstreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_SFC);
+	    printf ("\tif (extra & 0x8000) {\n");	/* address/data */
+	    switch (curi->size) {
+	    case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break;
+	    case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break;
+	    case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break;
+	    default: abort ();
+	    }
+	    printf ("\t} else {\n");
+	    genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, "", XLATE_LOG);
+	    printf ("\t}\n");
+	    pop_braces (old_brace_level);
 	}
-	printf ("\t} else {\n");
-	genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, "");
-	printf ("\t}\n");
-	pop_braces (old_brace_level);
     }
     break;
     case i_BKPT:		/* only needed for hardware emulators */
@@ -1950,7 +2000,7 @@
 	break;
     case i_TRAPcc:
 	if (curi->smode != am_unknown && curi->smode != am_illg)
-	    genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0);
+	    genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	printf ("\tif (cctrue(%d)) { Exception(7,m68k_getpc()); goto %s; }\n", curi->cc, endlabelstr);
 	need_endlabel = 1;
 	break;
@@ -1958,14 +2008,14 @@
 	sync_m68k_pc ();
 	start_brace ();
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tm68k_divl(opcode, dst, extra, oldpc);\n");
 	break;
     case i_MULL:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
-	genamode (curi->dmode, "dstreg", curi->size, "dst", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tm68k_mull(opcode, dst, extra);\n");
 	break;
@@ -1977,8 +2027,8 @@
     case i_BFFFO:
     case i_BFSET:
     case i_BFINS:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
-	genamode (curi->dmode, "dstreg", sz_long, "dst", 2, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
 	start_brace ();
 	printf ("\tuae_s32 offset = extra & 0x800 ? m68k_dreg(regs, (extra >> 6) & 7) : (extra >> 6) & 0x1f;\n");
 	printf ("\tint width = (((extra & 0x20 ? m68k_dreg(regs, extra & 7) : extra) -1) & 0x1f) +1;\n");
@@ -2082,23 +2132,23 @@
 	}
 	break;
     case i_TAS:
-	genamode (curi->smode, "srcreg", curi->size, "src", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	genflags (flag_logical, curi->size, "src", "", "");
 	printf ("\tsrc |= 0x80;\n");
-	genastore ("src", curi->smode, "srcreg", curi->size, "src");
+	genastore ("src", curi->smode, "srcreg", curi->size, "src", xlateflag);
 	break;
     case i_FPP:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tfpp_opp(opcode,extra);\n");
 	break;
     case i_FDBcc:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tfdbcc_opp(opcode,extra);\n");
 	break;
     case i_FScc:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tfscc_opp(opcode,extra);\n");
 	break;
@@ -2107,7 +2157,7 @@
 	start_brace ();
 	printf ("\tuaecptr oldpc = m68k_getpc();\n");
 	if (curi->smode != am_unknown && curi->smode != am_illg)
-	    genamode (curi->smode, "srcreg", curi->size, "dummy", 1, 0);
+	    genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tftrapcc_opp(opcode,oldpc);\n");
 	break;
@@ -2115,7 +2165,7 @@
 	sync_m68k_pc ();
 	start_brace ();
 	printf ("\tuaecptr pc = m68k_getpc();\n");
-	genamode (curi->dmode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->dmode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tfbcc_opp(opcode,pc,extra);\n");
 	break;
@@ -2150,8 +2200,8 @@
 	     printf ("\tm68k_areg(regs, dstreg) += 16;\n");
 	 } else {
 	     /* Other variants */
-	     genamode (curi->smode, "srcreg", curi->size, "mems", 0, 2);
-	     genamode (curi->dmode, "dstreg", curi->size, "memd", 0, 2);
+	     genamode (curi->smode, "srcreg", curi->size, "mems", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG);
+	     genamode (curi->dmode, "dstreg", curi->size, "memd", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG);
 	     printf ("\tmemsa &= ~15;\n");
 	     printf ("\tmemda &= ~15;\n");
 	     printf ("\tput_long(memda, get_long(memsa));\n");
@@ -2166,7 +2216,7 @@
 	 break;
 
     case i_MMUOP:
-	genamode (curi->smode, "srcreg", curi->size, "extra", 1, 0);
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
 	sync_m68k_pc ();
 	printf ("\tmmu_op(opcode,extra);\n");
 	break;
@@ -2310,8 +2360,11 @@
 
     using_prefetch = 0;
     using_exception_3 = 0;
+    using_mmu = 0;
+
     for (i = 0; i < 6; i++) {
 	cpu_level = 4 - i;
+	using_mmu = cpu_level == 4;
 	if (i == 5) {
 	    cpu_level = 0;
 	    using_prefetch = 1;
diff -urN src-0.8.22/src/gencpu.c~ src-0.8.22-mmu/src/gencpu.c~
--- src-0.8.22/src/gencpu.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/gencpu.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,2436 @@
+/*
+ * UAE - The Un*x Amiga Emulator
+ *
+ * MC68000 emulation generator
+ *
+ * This is a fairly stupid program that generates a lot of case labels that
+ * can be #included in a switch statement.
+ * As an alternative, it can generate functions that handle specific
+ * MC68000 instructions, plus a prototype header file and a function pointer
+ * array to look up the function for an opcode.
+ * Error checking is bad, an illegal table68k file will cause the program to
+ * call abort().
+ * The generated code is sometimes sub-optimal, an optimizing compiler should
+ * take care of this.
+ *
+ * The source for the insn timings is Markt & Technik's Amiga Magazin 8/1992.
+ *
+ * Copyright 1995, 1996, 1997, 1998, 1999, 2000 Bernd Schmidt
+ * vim:ts=8:sw=4:
+ */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+#include <ctype.h>
+
+#include "readcpu.h"
+
+#define BOOL_TYPE "int"
+#define VERIFY_MMU_GENAMODE	0
+
+static FILE *headerfile;
+static FILE *stblfile;
+
+static int using_prefetch;
+static int using_exception_3;
+static int using_mmu;
+static int cpu_level;
+
+/* For the current opcode, the next lower level that will have different code.
+ * Initialized to -1 for each opcode. If it remains unchanged, indicates we
+ * are done with that opcode.  */
+static int next_cpu_level;
+
+static int *opcode_map;
+static int *opcode_next_clev;
+static int *opcode_last_postfix;
+static unsigned long *counts;
+
+
+#define GENA_GETV_NO_FETCH	0
+#define GENA_GETV_FETCH		1
+#define GENA_GETV_FETCH_ALIGN 2
+#define GENA_MOVEM_DO_INC	0
+#define GENA_MOVEM_NO_INC	1
+#define GENA_MOVEM_MOVE16	2
+
+#define XLATE_LOG	0
+#define XLATE_PHYS	1
+#define XLATE_SFC	2
+#define XLATE_DFC	3
+static char * mem_prefix[4] = { "", "phys_", "sfc_", "dfc_" };
+
+static void read_counts (void)
+{
+    FILE *file;
+    unsigned long opcode, count, total;
+    char name[20];
+    int nr = 0;
+    memset (counts, 0, 65536 * sizeof *counts);
+
+    file = fopen ("frequent.68k", "r");
+    if (file) {
+	fscanf (file, "Total: %lu\n", &total);
+	while (fscanf (file, "%lx: %lu %s\n", &opcode, &count, name) == 3) {
+	    opcode_next_clev[nr] = 4;
+	    opcode_last_postfix[nr] = -1;
+	    opcode_map[nr++] = opcode;
+	    counts[opcode] = count;
+	}
+	fclose (file);
+    }
+    if (nr == nr_cpuop_funcs)
+	return;
+    for (opcode = 0; opcode < 0x10000; opcode++) {
+	if (table68k[opcode].handler == -1 && table68k[opcode].mnemo != i_ILLG
+	    && counts[opcode] == 0)
+	{
+	    opcode_next_clev[nr] = 4;
+	    opcode_last_postfix[nr] = -1;
+	    opcode_map[nr++] = opcode;
+	    counts[opcode] = count;
+	}
+    }
+    if (nr != nr_cpuop_funcs)
+	abort ();
+}
+
+static char endlabelstr[80];
+static int endlabelno = 0;
+static int need_endlabel;
+
+static int n_braces = 0;
+static int m68k_pc_offset = 0;
+static int insn_n_cycles;
+
+static void start_brace (void)
+{
+    n_braces++;
+    printf ("{");
+}
+
+static void close_brace (void)
+{
+    assert (n_braces > 0);
+    n_braces--;
+    printf ("}");
+}
+
+static void finish_braces (void)
+{
+    while (n_braces > 0)
+	close_brace ();
+}
+
+static void pop_braces (int to)
+{
+    while (n_braces > to)
+	close_brace ();
+}
+
+static int bit_size (int size)
+{
+    switch (size) {
+     case sz_byte: return 8;
+     case sz_word: return 16;
+     case sz_long: return 32;
+     default: abort ();
+    }
+    return 0;
+}
+
+static const char *bit_mask (int size)
+{
+    switch (size) {
+     case sz_byte: return "0xff";
+     case sz_word: return "0xffff";
+     case sz_long: return "0xffffffff";
+     default: abort ();
+    }
+    return 0;
+}
+
+int nexti_no_inc = 0;
+
+
+static const char *gen_nextilong (void)
+{
+    static char buffer[80];
+    int r = m68k_pc_offset;
+
+	if (!nexti_no_inc)
+    m68k_pc_offset += 4;
+
+    insn_n_cycles += 8;
+
+    if (using_prefetch)
+	sprintf (buffer, "get_ilong_prefetch(%d)", r);
+    else
+	sprintf (buffer, "get_ilong(%d)", r);
+    return buffer;
+}
+
+static const char *gen_nextiword (void)
+{
+    static char buffer[80];
+    int r = m68k_pc_offset;
+	
+	if (!nexti_no_inc)
+    m68k_pc_offset += 2;
+
+    insn_n_cycles += 4;
+
+    if (using_prefetch)
+	sprintf (buffer, "get_iword_prefetch(%d)", r);
+    else
+	sprintf (buffer, "get_iword(%d)", r);
+    return buffer;
+}
+
+static const char *gen_nextibyte (void)
+{
+    static char buffer[80];
+    int r = m68k_pc_offset;
+    m68k_pc_offset += 2;
+
+    insn_n_cycles += 4;
+
+    if (using_prefetch)
+	sprintf (buffer, "get_ibyte_prefetch(%d)", r);
+    else
+	sprintf (buffer, "get_ibyte(%d)", r);
+    return buffer;
+}
+
+static void fill_prefetch_0 (void)
+{
+    if (using_prefetch)
+	printf ("fill_prefetch_0 ();\n");
+}
+
+static void fill_prefetch_2 (void)
+{
+    if (using_prefetch)
+	printf ("fill_prefetch_2 ();\n");
+}
+
+static void sync_m68k_pc (void)
+{
+    if (m68k_pc_offset == 0)
+	return;
+    printf ("m68k_incpc(%d);\n", m68k_pc_offset);
+    switch (m68k_pc_offset) {
+     case 0:
+	/*write_log ("refilling prefetch at 0\n"); */
+	break;
+     case 2:
+	fill_prefetch_2 ();
+	break;
+     default:
+	fill_prefetch_0 ();
+	break;
+    }
+    m68k_pc_offset = 0;
+}
+
+/* getv == 1: fetch data; getv != 0: check for odd address. If movem != 0,
+ * the calling routine handles Apdi and Aipi modes.
+ * gb-- movem == 2 means the same thing but for a MOVE16 instruction */
+static void genamode (amodes mode, char *reg, wordsizes size, char *name, int getv, int movem, int xlateflag)
+
+{
+
+	if (!using_mmu)
+		xlateflag = XLATE_PHYS;
+	
+    start_brace ();
+    switch (mode) {
+    case Dreg:
+	if (movem)
+	    abort ();
+	if (getv == GENA_GETV_FETCH)
+	    switch (size) {
+	    case sz_byte:
+#if defined(AMIGA) && !defined(WARPUP)
+		/* sam: I don't know why gcc.2.7.2.1 produces a code worse */
+		/* if it is not done like that: */
+		printf ("\tuae_s8 %s = ((uae_u8*)&m68k_dreg(regs, %s))[3];\n", name, reg);
+#else
+		printf ("\tuae_s8 %s = m68k_dreg(regs, %s);\n", name, reg);
+#endif
+		break;
+	    case sz_word:
+#if defined(AMIGA) && !defined(WARPUP)
+		printf ("\tuae_s16 %s = ((uae_s16*)&m68k_dreg(regs, %s))[1];\n", name, reg);
+#else
+		printf ("\tuae_s16 %s = m68k_dreg(regs, %s);\n", name, reg);
+#endif
+		break;
+	    case sz_long:
+		printf ("\tuae_s32 %s = m68k_dreg(regs, %s);\n", name, reg);
+		break;
+	    default:
+		abort ();
+	    }
+	return;
+    case Areg:
+	if (movem)
+	    abort ();
+	if (getv == GENA_GETV_FETCH)
+	    switch (size) {
+	    case sz_word:
+		printf ("\tuae_s16 %s = m68k_areg(regs, %s);\n", name, reg);
+		break;
+	    case sz_long:
+		printf ("\tuae_s32 %s = m68k_areg(regs, %s);\n", name, reg);
+		break;
+	    default:
+		abort ();
+	    }
+	return;
+    case Aind:
+	printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg);
+	break;
+    case Aipi:
+	printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg);
+	break;
+    case Apdi:
+	insn_n_cycles += 2;
+	switch (size) {
+	case sz_byte:
+	    if (movem)
+		printf ("\tuaecptr %sa = m68k_areg(regs, %s);\n", name, reg);
+	    else
+		printf ("\tuaecptr %sa = m68k_areg(regs, %s) - areg_byteinc[%s];\n", name, reg, reg);
+	    break;
+	case sz_word:
+	    printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 2);
+	    break;
+	case sz_long:
+	    printf ("\tuaecptr %sa = m68k_areg(regs, %s) - %d;\n", name, reg, movem ? 0 : 4);
+	    break;
+	default:
+	    abort ();
+	}
+	break;
+    case Ad16:
+	printf ("\tuaecptr %sa = m68k_areg(regs, %s) + (uae_s32)(uae_s16)%s;\n", name, reg, gen_nextiword ());
+	break;
+    case Ad8r:
+	insn_n_cycles += 2;
+	if (cpu_level > 1) {
+	    if (next_cpu_level < 1)
+		next_cpu_level = 1;
+	    sync_m68k_pc ();
+	    start_brace ();
+	    /* This would ordinarily be done in gen_nextiword, which we bypass.  */
+	    insn_n_cycles += 4;
+	    printf ("\tuaecptr %sa = get_disp_ea_020(m68k_areg(regs, %s), next_iword());\n", name, reg);
+	} else
+	    printf ("\tuaecptr %sa = get_disp_ea_000(m68k_areg(regs, %s), %s);\n", name, reg, gen_nextiword ());
+
+	break;
+    case PC16:
+	printf ("\tuaecptr %sa = m68k_getpc () + %d;\n", name, m68k_pc_offset);
+	printf ("\t%sa += (uae_s32)(uae_s16)%s;\n", name, gen_nextiword ());
+	break;
+    case PC8r:
+	insn_n_cycles += 2;
+	if (cpu_level > 1) {
+	    if (next_cpu_level < 1)
+		next_cpu_level = 1;
+	    sync_m68k_pc ();
+	    start_brace ();
+	    /* This would ordinarily be done in gen_nextiword, which we bypass.  */
+	    insn_n_cycles += 4;
+	    printf ("\tuaecptr tmppc = m68k_getpc();\n");
+	    printf ("\tuaecptr %sa = get_disp_ea_020(tmppc, next_iword());\n", name);
+	} else {
+	    printf ("\tuaecptr tmppc = m68k_getpc() + %d;\n", m68k_pc_offset);
+	    printf ("\tuaecptr %sa = get_disp_ea_000(tmppc, %s);\n", name, gen_nextiword ());
+	}
+
+	break;
+    case absw:
+	printf ("\tuaecptr %sa = (uae_s32)(uae_s16)%s;\n", name, gen_nextiword ());
+	break;
+    case absl:
+	printf ("\tuaecptr %sa = %s;\n", name, gen_nextilong ());
+	break;
+    case imm:
+	if (getv != GENA_GETV_FETCH)
+	    abort ();
+	switch (size) {
+	case sz_byte:
+	    printf ("\tuae_s8 %s = %s;\n", name, gen_nextibyte ());
+	    break;
+	case sz_word:
+	    printf ("\tuae_s16 %s = %s;\n", name, gen_nextiword ());
+	    break;
+	case sz_long:
+	    printf ("\tuae_s32 %s = %s;\n", name, gen_nextilong ());
+	    break;
+	default:
+	    abort ();
+	}
+	return;
+    case imm0:
+	if (getv != GENA_GETV_FETCH)
+	    abort ();
+	printf ("\tuae_s8 %s = %s;\n", name, gen_nextibyte ());
+	return;
+    case imm1:
+	if (getv != GENA_GETV_FETCH)
+	    abort ();
+	printf ("\tuae_s16 %s = %s;\n", name, gen_nextiword ());
+	return;
+    case imm2:
+	if (getv != GENA_GETV_FETCH)
+	    abort ();
+	printf ("\tuae_s32 %s = %s;\n", name, gen_nextilong ());
+	return;
+    case immi:
+	if (getv != GENA_GETV_FETCH)
+	    abort ();
+	printf ("\tuae_u32 %s = %s;\n", name, reg);
+	return;
+    default:
+	abort ();
+    }
+
+    /* We get here for all non-reg non-immediate addressing modes to
+     * actually fetch the value. */
+
+    if (using_exception_3 && getv != GENA_GETV_NO_FETCH && size != sz_byte) {	    
+	printf ("\tif ((%sa & 1) != 0) {\n", name);
+	printf ("\t\tlast_fault_for_exception_3 = %sa;\n", name);
+	printf ("\t\tlast_op_for_exception_3 = opcode;\n");
+	printf ("\t\tlast_addr_for_exception_3 = m68k_getpc() + %d;\n", m68k_pc_offset);
+	printf ("\t\tException(3, 0);\n");
+	printf ("\t\tgoto %s;\n", endlabelstr);
+	printf ("\t}\n");
+	need_endlabel = 1;
+	start_brace ();
+    }
+
+    if (getv == GENA_GETV_FETCH) {
+	switch (size) {
+	case sz_byte: insn_n_cycles += 4; break;
+	case sz_word: insn_n_cycles += 4; break;
+	case sz_long: insn_n_cycles += 8; break;
+	default: abort ();
+	}
+	start_brace ();
+	switch (size) {
+	case sz_byte: printf ("\tuae_s8 %s = %sget_byte(%sa);\n", name, mem_prefix[xlateflag], name); break;
+	case sz_word: printf ("\tuae_s16 %s = %sget_word(%sa);\n", name, mem_prefix[xlateflag], name); break;
+	case sz_long: printf ("\tuae_s32 %s = %sget_long(%sa);\n", name, mem_prefix[xlateflag], name); break;
+	default: abort ();
+	}
+    }
+
+    /* We now might have to fix up the register for pre-dec or post-inc
+     * addressing modes. */
+    if (!movem)
+	switch (mode) {
+	case Aipi:
+	    switch (size) {
+	    case sz_byte:
+		printf ("\tm68k_areg(regs, %s) += areg_byteinc[%s];\n", reg, reg);
+		break;
+	    case sz_word:
+		printf ("\tm68k_areg(regs, %s) += 2;\n", reg);
+		break;
+	    case sz_long:
+		printf ("\tm68k_areg(regs, %s) += 4;\n", reg);
+		break;
+	    default:
+		abort ();
+	    }
+	    break;
+	case Apdi:
+	    printf ("\tm68k_areg (regs, %s) = %sa;\n", reg, name);
+	    break;
+	default:
+	    break;
+	}
+}
+
+static void genastore (char *from, amodes mode, char *reg, wordsizes size, char *to, int xlateflag)
+{
+    switch (mode) {
+     case Dreg:
+	switch (size) {
+	 case sz_byte:
+	    printf ("\tm68k_dreg(regs, %s) = (m68k_dreg(regs, %s) & ~0xff) | ((%s) & 0xff);\n", reg, reg, from);
+	    break;
+	 case sz_word:
+	    printf ("\tm68k_dreg(regs, %s) = (m68k_dreg(regs, %s) & ~0xffff) | ((%s) & 0xffff);\n", reg, reg, from);
+	    break;
+	 case sz_long:
+	    printf ("\tm68k_dreg(regs, %s) = (%s);\n", reg, from);
+	    break;
+	 default:
+	    abort ();
+	}
+	break;
+     case Areg:
+	switch (size) {
+	 case sz_word:
+	    write_log ("Foo\n");
+	    printf ("\tm68k_areg(regs, %s) = (uae_s32)(uae_s16)(%s);\n", reg, from);
+	    break;
+	 case sz_long:
+	    printf ("\tm68k_areg(regs, %s) = (%s);\n", reg, from);
+	    break;
+	 default:
+	    abort ();
+	}
+	break;
+     case Aind:
+     case Aipi:
+     case Apdi:
+     case Ad16:
+     case Ad8r:
+     case absw:
+     case absl:
+     case PC16:
+     case PC8r:
+	if (using_prefetch)
+	    sync_m68k_pc ();
+	switch (size) {
+	 case sz_byte:
+	    insn_n_cycles += 4;
+	    printf ("\t%sput_byte(%sa,%s);\n", mem_prefix[xlateflag], to, from);
+	    break;
+	 case sz_word:
+	    insn_n_cycles += 4;
+	    if (cpu_level < 2 && (mode == PC16 || mode == PC8r))
+		abort ();
+	    printf ("\t%sput_word(%sa,%s);\n", mem_prefix[xlateflag], to, from);
+	    break;
+	 case sz_long:
+	    insn_n_cycles += 8;
+	    if (cpu_level < 2 && (mode == PC16 || mode == PC8r))
+		abort ();
+	    printf ("\t%sput_long(%sa,%s);\n", mem_prefix[xlateflag], to, from);
+	    break;
+	 default:
+	    abort ();
+	}
+	break;
+     case imm:
+     case imm0:
+     case imm1:
+     case imm2:
+     case immi:
+	abort ();
+	break;
+     default:
+	abort ();
+    }
+}
+
+static void genmovemel (uae_u16 opcode)
+{
+    char getcode[100];
+    int size = table68k[opcode].size == sz_long ? 4 : 2;
+	int xlateflag = using_mmu ? XLATE_LOG : XLATE_PHYS;
+
+    if (table68k[opcode].size == sz_long) {
+		strcpy (getcode, mem_prefix[xlateflag]);
+		strcat (getcode, "get_long(srca)");
+    } else {
+		strcpy (getcode, "(uae_s32)(uae_s16)");
+		strcat (getcode, mem_prefix[xlateflag]);
+		strcat (getcode, "get_word(srca)");
+    }
+
+    printf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
+    printf ("\tunsigned int dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n");
+    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, xlateflag);
+    start_brace ();
+    printf ("\twhile (dmask) { m68k_dreg(regs, movem_index1[dmask]) = %s; srca += %d; dmask = movem_next[dmask]; }\n",
+	    getcode, size);
+    printf ("\twhile (amask) { m68k_areg(regs, movem_index1[amask]) = %s; srca += %d; amask = movem_next[amask]; }\n",
+	    getcode, size);
+
+    if (table68k[opcode].dmode == Aipi)
+	printf ("\tm68k_areg(regs, dstreg) = srca;\n");
+}
+
+static void genmovemle (uae_u16 opcode)
+{
+    char putcode[100];
+    int size = table68k[opcode].size == sz_long ? 4 : 2;
+	int noxlate = using_mmu ? XLATE_LOG : XLATE_PHYS;
+
+	strcpy(putcode, mem_prefix[noxlate]);
+
+    if (table68k[opcode].size == sz_long) {
+	strcat (putcode, "put_long(srca,");
+    } else {
+	strcat (putcode, "put_word(srca,");
+    }
+
+    printf ("\tuae_u16 mask = %s;\n", gen_nextiword ());
+    genamode (table68k[opcode].dmode, "dstreg", table68k[opcode].size, "src",
+			GENA_GETV_FETCH_ALIGN, GENA_MOVEM_NO_INC, XLATE_LOG);
+    if (using_prefetch)
+	sync_m68k_pc ();
+
+    start_brace ();
+    if (table68k[opcode].dmode == Apdi) {
+	printf ("\tuae_u16 amask = mask & 0xff, dmask = (mask >> 8) & 0xff;\n");
+	printf ("\twhile (amask) { srca -= %d; %s m68k_areg(regs, movem_index2[amask])); amask = movem_next[amask]; }\n",
+		size, putcode);
+	printf ("\twhile (dmask) { srca -= %d; %s m68k_dreg(regs, movem_index2[dmask])); dmask = movem_next[dmask]; }\n",
+		size, putcode);
+	printf ("\tm68k_areg(regs, dstreg) = srca;\n");
+    } else {
+	printf ("\tuae_u16 dmask = mask & 0xff, amask = (mask >> 8) & 0xff;\n");
+	printf ("\twhile (dmask) { %s m68k_dreg(regs, movem_index1[dmask])); srca += %d; dmask = movem_next[dmask]; }\n",
+		putcode, size);
+	printf ("\twhile (amask) { %s m68k_areg(regs, movem_index1[amask])); srca += %d; amask = movem_next[amask]; }\n",
+		putcode, size);
+    }
+}
+
+static void duplicate_carry (void)
+{
+    printf ("\tCOPY_CARRY;\n");
+}
+
+typedef enum
+{
+  flag_logical_noclobber, flag_logical, flag_add, flag_sub, flag_cmp, flag_addx, flag_subx, flag_zn,
+  flag_av, flag_sv
+}
+flagtypes;
+
+static void genflags_normal (flagtypes type, wordsizes size, char *value, char *src, char *dst)
+{
+    char vstr[100], sstr[100], dstr[100];
+    char usstr[100], udstr[100];
+    char unsstr[100], undstr[100];
+
+    switch (size) {
+     case sz_byte:
+	strcpy (vstr, "((uae_s8)(");
+	strcpy (usstr, "((uae_u8)(");
+	break;
+     case sz_word:
+	strcpy (vstr, "((uae_s16)(");
+	strcpy (usstr, "((uae_u16)(");
+	break;
+     case sz_long:
+	strcpy (vstr, "((uae_s32)(");
+	strcpy (usstr, "((uae_u32)(");
+	break;
+     default:
+	abort ();
+    }
+    strcpy (unsstr, usstr);
+
+    strcpy (sstr, vstr);
+    strcpy (dstr, vstr);
+    strcat (vstr, value);
+    strcat (vstr, "))");
+    strcat (dstr, dst);
+    strcat (dstr, "))");
+    strcat (sstr, src);
+    strcat (sstr, "))");
+
+    strcpy (udstr, usstr);
+    strcat (udstr, dst);
+    strcat (udstr, "))");
+    strcat (usstr, src);
+    strcat (usstr, "))");
+
+    strcpy (undstr, unsstr);
+    strcat (unsstr, "-");
+    strcat (undstr, "~");
+    strcat (undstr, dst);
+    strcat (undstr, "))");
+    strcat (unsstr, src);
+    strcat (unsstr, "))");
+
+    switch (type) {
+     case flag_logical_noclobber:
+     case flag_logical:
+     case flag_zn:
+     case flag_av:
+     case flag_sv:
+     case flag_addx:
+     case flag_subx:
+	break;
+
+     case flag_add:
+	start_brace ();
+	printf ("uae_u32 %s = %s + %s;\n", value, dstr, sstr);
+	break;
+     case flag_sub:
+     case flag_cmp:
+	start_brace ();
+	printf ("uae_u32 %s = %s - %s;\n", value, dstr, sstr);
+	break;
+    }
+
+    switch (type) {
+     case flag_logical_noclobber:
+     case flag_logical:
+     case flag_zn:
+	break;
+
+     case flag_add:
+     case flag_sub:
+     case flag_addx:
+     case flag_subx:
+     case flag_cmp:
+     case flag_av:
+     case flag_sv:
+	start_brace ();
+	printf ("\t" BOOL_TYPE " flgs = %s < 0;\n", sstr);
+	printf ("\t" BOOL_TYPE " flgo = %s < 0;\n", dstr);
+	printf ("\t" BOOL_TYPE " flgn = %s < 0;\n", vstr);
+	break;
+    }
+
+    switch (type) {
+     case flag_logical:
+	printf ("\tCLEAR_CZNV;\n");
+	printf ("\tSET_ZFLG (%s == 0);\n", vstr);
+	printf ("\tSET_NFLG (%s < 0);\n", vstr);
+	break;
+     case flag_logical_noclobber:
+	printf ("\tSET_ZFLG (%s == 0);\n", vstr);
+	printf ("\tSET_NFLG (%s < 0);\n", vstr);
+	break;
+     case flag_av:
+	printf ("\tSET_VFLG ((flgs ^ flgn) & (flgo ^ flgn));\n");
+	break;
+     case flag_sv:
+	printf ("\tSET_VFLG ((flgs ^ flgo) & (flgn ^ flgo));\n");
+	break;
+     case flag_zn:
+	printf ("\tSET_ZFLG (GET_ZFLG & (%s == 0));\n", vstr);
+	printf ("\tSET_NFLG (%s < 0);\n", vstr);
+	break;
+     case flag_add:
+	printf ("\tSET_ZFLG (%s == 0);\n", vstr);
+	printf ("\tSET_VFLG ((flgs ^ flgn) & (flgo ^ flgn));\n");
+	printf ("\tSET_CFLG (%s < %s);\n", undstr, usstr);
+	duplicate_carry ();
+	printf ("\tSET_NFLG (flgn != 0);\n");
+	break;
+     case flag_sub:
+	printf ("\tSET_ZFLG (%s == 0);\n", vstr);
+	printf ("\tSET_VFLG ((flgs ^ flgo) & (flgn ^ flgo));\n");
+	printf ("\tSET_CFLG (%s > %s);\n", usstr, udstr);
+	duplicate_carry ();
+	printf ("\tSET_NFLG (flgn != 0);\n");
+	break;
+     case flag_addx:
+	printf ("\tSET_VFLG ((flgs ^ flgn) & (flgo ^ flgn));\n"); /* minterm SON: 0x42 */
+	printf ("\tSET_CFLG (flgs ^ ((flgs ^ flgo) & (flgo ^ flgn)));\n"); /* minterm SON: 0xD4 */
+	duplicate_carry ();
+	break;
+     case flag_subx:
+	printf ("\tSET_VFLG ((flgs ^ flgo) & (flgo ^ flgn));\n"); /* minterm SON: 0x24 */
+	printf ("\tSET_CFLG (flgs ^ ((flgs ^ flgn) & (flgo ^ flgn)));\n"); /* minterm SON: 0xB2 */
+	duplicate_carry ();
+	break;
+     case flag_cmp:
+	printf ("\tSET_ZFLG (%s == 0);\n", vstr);
+	printf ("\tSET_VFLG ((flgs != flgo) && (flgn != flgo));\n");
+	printf ("\tSET_CFLG (%s > %s);\n", usstr, udstr);
+	printf ("\tSET_NFLG (flgn != 0);\n");
+	break;
+    }
+}
+
+static void genflags (flagtypes type, wordsizes size, char *value, char *src, char *dst)
+{
+    /* Temporarily deleted 68k/ARM flag optimizations.  I'd prefer to have
+       them in the appropriate m68k.h files and use just one copy of this
+       code here.  The API can be changed if necessary.  */
+#ifdef OPTIMIZED_FLAGS
+    switch (type) {
+     case flag_add:
+     case flag_sub:
+	start_brace ();
+	printf ("\tuae_u32 %s;\n", value);
+	break;
+
+     default:
+	break;
+    }
+
+    /* At least some of those casts are fairly important! */
+    switch (type) {
+     case flag_logical_noclobber:
+	printf ("\t{uae_u32 oldcznv = GET_CZNV & ~(FLAGVAL_Z | FLAGVAL_N);\n");
+	if (strcmp (value, "0") == 0) {
+	    printf ("\tSET_CZNV (olcznv | FLAGVAL_Z);\n");
+	} else {
+	    switch (size) {
+	     case sz_byte: printf ("\toptflag_testb ((uae_s8)(%s));\n", value); break;
+	     case sz_word: printf ("\toptflag_testw ((uae_s16)(%s));\n", value); break;
+	     case sz_long: printf ("\toptflag_testl ((uae_s32)(%s));\n", value); break;
+	    }
+	    printf ("\tIOR_CZNV (oldcznv);\n");
+	}
+	printf ("\t}\n");
+	return;
+     case flag_logical:
+	if (strcmp (value, "0") == 0) {
+	    printf ("\tSET_CZNV (FLAGVAL_Z);\n");
+	} else {
+	    switch (size) {
+	     case sz_byte: printf ("\toptflag_testb ((uae_s8)(%s));\n", value); break;
+	     case sz_word: printf ("\toptflag_testw ((uae_s16)(%s));\n", value); break;
+	     case sz_long: printf ("\toptflag_testl ((uae_s32)(%s));\n", value); break;
+	    }
+	}
+	return;
+
+     case flag_add:
+	switch (size) {
+	 case sz_byte: printf ("\toptflag_addb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break;
+	 case sz_word: printf ("\toptflag_addw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break;
+	 case sz_long: printf ("\toptflag_addl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break;
+	}
+	return;
+
+     case flag_sub:
+	switch (size) {
+	 case sz_byte: printf ("\toptflag_subb (%s, (uae_s8)(%s), (uae_s8)(%s));\n", value, src, dst); break;
+	 case sz_word: printf ("\toptflag_subw (%s, (uae_s16)(%s), (uae_s16)(%s));\n", value, src, dst); break;
+	 case sz_long: printf ("\toptflag_subl (%s, (uae_s32)(%s), (uae_s32)(%s));\n", value, src, dst); break;
+	}
+	return;
+
+     case flag_cmp:
+	switch (size) {
+	 case sz_byte: printf ("\toptflag_cmpb ((uae_s8)(%s), (uae_s8)(%s));\n", src, dst); break;
+	 case sz_word: printf ("\toptflag_cmpw ((uae_s16)(%s), (uae_s16)(%s));\n", src, dst); break;
+	 case sz_long: printf ("\toptflag_cmpl ((uae_s32)(%s), (uae_s32)(%s));\n", src, dst); break;
+	}
+	return;
+	
+     default:
+	break;
+    }
+#endif
+
+    genflags_normal (type, size, value, src, dst);
+}
+
+static void force_range_for_rox (const char *var, wordsizes size)
+{
+    /* Could do a modulo operation here... which one is faster? */
+    switch (size) {
+     case sz_long:
+	printf ("\tif (%s >= 33) %s -= 33;\n", var, var);
+	break;
+     case sz_word:
+	printf ("\tif (%s >= 34) %s -= 34;\n", var, var);
+	printf ("\tif (%s >= 17) %s -= 17;\n", var, var);
+	break;
+     case sz_byte:
+	printf ("\tif (%s >= 36) %s -= 36;\n", var, var);
+	printf ("\tif (%s >= 18) %s -= 18;\n", var, var);
+	printf ("\tif (%s >= 9) %s -= 9;\n", var, var);
+	break;
+    }
+}
+
+static const char *cmask (wordsizes size)
+{
+    switch (size) {
+     case sz_byte: return "0x80";
+     case sz_word: return "0x8000";
+     case sz_long: return "0x80000000";
+     default: abort ();
+    }
+}
+
+static int source_is_imm1_8 (struct instr *i)
+{
+    return i->stype == 3;
+}
+
+static void gen_opcode (unsigned long int opcode)
+{
+    struct instr *curi = table68k + opcode;
+	int xlateflag = using_mmu ? XLATE_LOG : XLATE_PHYS;
+    insn_n_cycles = 4;
+
+    start_brace ();
+#if 0
+    printf ("uae_u8 *m68k_pc = regs.pc_p;\n");
+#endif
+    m68k_pc_offset = 2;
+    switch (curi->plev) {
+    case 0: /* not privileged */
+	break;
+    case 1: /* unprivileged only on 68000 */
+	if (cpu_level == 0)
+	    break;
+	if (next_cpu_level < 0)
+	    next_cpu_level = 0;
+
+	/* fall through */
+    case 2: /* priviledged */
+	printf ("if (!regs.s) { Exception(8,0); goto %s; }\n", endlabelstr);
+	need_endlabel = 1;
+	start_brace ();
+	break;
+    case 3: /* privileged if size == word */
+	if (curi->size == sz_byte)
+	    break;
+	printf ("if (!regs.s) { Exception(8,0); goto %s; }\n", endlabelstr);
+	need_endlabel = 1;
+	start_brace ();
+	break;
+    }
+    switch (curi->mnemo) {
+    case i_OR:
+    case i_AND:
+    case i_EOR:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tsrc %c= dst;\n", curi->mnemo == i_OR ? '|' : curi->mnemo == i_AND ? '&' : '^');
+	genflags (flag_logical, curi->size, "src", "", "");
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_ORSR:
+    case i_EORSR:
+	printf ("\tMakeSR();\n");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte) {
+	    printf ("\tsrc &= 0xFF;\n");
+	}
+	printf ("\tregs.sr %c= src;\n", curi->mnemo == i_EORSR ? '^' : '|');
+	printf ("\tMakeFromSR();\n");
+	break;
+    case i_ANDSR:
+	printf ("\tMakeSR();\n");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte) {
+	    printf ("\tsrc |= 0xFF00;\n");
+	}
+	printf ("\tregs.sr &= src;\n");
+	printf ("\tMakeFromSR();\n");
+	break;
+    case i_SUB:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	genflags (flag_sub, curi->size, "newv", "src", "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_SUBA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = dst - src;\n");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	break;
+    case i_SUBX:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = dst - src - (GET_XFLG ? 1 : 0);\n");
+	genflags (flag_subx, curi->size, "newv", "src", "dst");
+	genflags (flag_zn, curi->size, "newv", "", "");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_SBCD:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u16 newv_lo = (dst & 0xF) - (src & 0xF) - (GET_XFLG ? 1 : 0);\n");
+	printf ("\tuae_u16 newv_hi = (dst & 0xF0) - (src & 0xF0);\n");
+	printf ("\tuae_u16 newv, tmp_newv;\n");
+	printf ("\tint bcd = 0;\n");
+	printf ("\tnewv = tmp_newv = newv_hi + newv_lo;\n");
+	printf ("\tif (newv_lo & 0xF0) { newv -= 6; bcd = 6; };\n");
+	printf ("\tif ((((dst & 0xFF) - (src & 0xFF) - (GET_XFLG ? 1 : 0)) & 0x100) > 0xFF) { newv -= 0x60; }\n");
+	printf ("\tSET_CFLG ((((dst & 0xFF) - (src & 0xFF) - bcd - (GET_XFLG ? 1 : 0)) & 0x300) > 0xFF);\n");
+	duplicate_carry ();
+	genflags (flag_zn, curi->size, "newv", "", "");
+	printf ("\tSET_VFLG ((tmp_newv & 0x80) != 0 && (newv & 0x80) == 0);\n");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_ADD:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	genflags (flag_add, curi->size, "newv", "src", "dst");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_ADDA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = dst + src;\n");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	break;
+    case i_ADDX:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = dst + src + (GET_XFLG ? 1 : 0);\n");
+	genflags (flag_addx, curi->size, "newv", "src", "dst");
+	genflags (flag_zn, curi->size, "newv", "", "");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_ABCD:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u16 newv_lo = (src & 0xF) + (dst & 0xF) + (GET_XFLG ? 1 : 0);\n");
+	printf ("\tuae_u16 newv_hi = (src & 0xF0) + (dst & 0xF0);\n");
+	printf ("\tuae_u16 newv, tmp_newv;\n");
+	printf ("\tint cflg;\n");
+	printf ("\tnewv = tmp_newv = newv_hi + newv_lo;");
+	printf ("\tif (newv_lo > 9) { newv += 6; }\n");
+	printf ("\tcflg = (newv & 0x3F0) > 0x90;\n");
+	printf ("\tif (cflg) newv += 0x60;\n");
+	printf ("\tSET_CFLG (cflg);\n");
+	duplicate_carry ();
+	genflags (flag_zn, curi->size, "newv", "", "");
+	printf ("\tSET_VFLG ((tmp_newv & 0x80) == 0 && (newv & 0x80) != 0);\n");
+	genastore ("newv", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_NEG:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	genflags (flag_sub, curi->size, "dst", "src", "0");
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_NEGX:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = 0 - src - (GET_XFLG ? 1 : 0);\n");
+	genflags (flag_subx, curi->size, "newv", "src", "0");
+	genflags (flag_zn, curi->size, "newv", "", "");
+	genastore ("newv", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_NBCD:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u16 newv_lo = - (src & 0xF) - (GET_XFLG ? 1 : 0);\n");
+	printf ("\tuae_u16 newv_hi = - (src & 0xF0);\n");
+	printf ("\tuae_u16 newv;\n");
+	printf ("\tint cflg;\n");
+	printf ("\tif (newv_lo > 9) { newv_lo -= 6; }\n");
+	printf ("\tnewv = newv_hi + newv_lo;");
+	printf ("\tcflg = (newv & 0x1F0) > 0x90;\n");
+	printf ("\tif (cflg) newv -= 0x60;\n");
+	printf ("\tSET_CFLG (cflg);\n");
+	duplicate_carry();
+	genflags (flag_zn, curi->size, "newv", "", "");
+	genastore ("newv", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_CLR:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genflags (flag_logical, curi->size, "0", "", "");
+	genastore ("0", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_NOT:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 dst = ~src;\n");
+	genflags (flag_logical, curi->size, "dst", "", "");
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_TST:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genflags (flag_logical, curi->size, "src", "", "");
+	break;
+    case i_BTST:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte)
+	    printf ("\tsrc &= 7;\n");
+	else
+	    printf ("\tsrc &= 31;\n");
+	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
+	break;
+    case i_BCHG:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte)
+	    printf ("\tsrc &= 7;\n");
+	else
+	    printf ("\tsrc &= 31;\n");
+	printf ("\tdst ^= (1 << src);\n");
+	printf ("\tSET_ZFLG (((uae_u32)dst & (1 << src)) >> src);\n");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_BCLR:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte)
+	    printf ("\tsrc &= 7;\n");
+	else
+	    printf ("\tsrc &= 31;\n");
+	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
+	printf ("\tdst &= ~(1 << src);\n");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_BSET:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte)
+	    printf ("\tsrc &= 7;\n");
+	else
+	    printf ("\tsrc &= 31;\n");
+	printf ("\tSET_ZFLG (1 ^ ((dst >> src) & 1));\n");
+	printf ("\tdst |= (1 << src);\n");
+	genastore ("dst", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_CMPM:
+    case i_CMP:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	genflags (flag_cmp, curi->size, "newv", "src", "dst");
+	break;
+    case i_CMPA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	genflags (flag_cmp, sz_long, "newv", "src", "dst");
+	break;
+	/* The next two are coded a little unconventional, but they are doing
+	 * weird things... */
+    case i_MVPRM:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+
+	printf ("\tuaecptr memp = m68k_areg(regs, dstreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
+	if (curi->size == sz_word) {
+	    printf ("\tput_byte(memp, src >> 8); put_byte(memp + 2, src);\n");
+	} else {
+	    printf ("\tput_byte(memp, src >> 24); put_byte(memp + 2, src >> 16);\n");
+	    printf ("\tput_byte(memp + 4, src >> 8); put_byte(memp + 6, src);\n");
+	}
+	break;
+    case i_MVPMR:
+	printf ("\tuaecptr memp = m68k_areg(regs, srcreg) + (uae_s32)(uae_s16)%s;\n", gen_nextiword ());
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_word) {
+	    printf ("\tuae_u16 val = (get_byte(memp) << 8) + get_byte(memp + 2);\n");
+	} else {
+	    printf ("\tuae_u32 val = (get_byte(memp) << 24) + (get_byte(memp + 2) << 16)\n");
+	    printf ("              + (get_byte(memp + 4) << 8) + get_byte(memp + 6);\n");
+	}
+	genastore ("val", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_MOVE:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genflags (flag_logical, curi->size, "src", "", "");
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_MOVEA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_word) {
+	    printf ("\tuae_u32 val = (uae_s32)(uae_s16)src;\n");
+	} else {
+	    printf ("\tuae_u32 val = src;\n");
+	}
+	genastore ("val", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	break;
+    case i_MVSR2:
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tMakeSR();\n");
+	if (curi->size == sz_byte)
+	    genastore ("regs.sr & 0xff", curi->smode, "srcreg", sz_word, "src", xlateflag);
+	else
+	    genastore ("regs.sr", curi->smode, "srcreg", sz_word, "src", xlateflag);
+	break;
+    case i_MV2SR:
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	if (curi->size == sz_byte)
+	    printf ("\tMakeSR();\n\tregs.sr &= 0xFF00;\n\tregs.sr |= src & 0xFF;\n");
+	else {
+	    printf ("\tregs.sr = src;\n");
+	}
+	printf ("\tMakeFromSR();\n");
+	break;
+    case i_SWAP:
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 dst = ((src >> 16)&0xFFFF) | ((src&0xFFFF)<<16);\n");
+	genflags (flag_logical, sz_long, "dst", "", "");
+	genastore ("dst", curi->smode, "srcreg", sz_long, "src", xlateflag);
+	break;
+    case i_EXG:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("dst", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	genastore ("src", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_EXT:
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 dst = (uae_s32)(uae_s8)src;\n"); break;
+	case sz_word: printf ("\tuae_u16 dst = (uae_s16)(uae_s8)src;\n"); break;
+	case sz_long: printf ("\tuae_u32 dst = (uae_s32)(uae_s16)src;\n"); break;
+	default: abort ();
+	}
+	genflags (flag_logical,
+		  curi->size == sz_word ? sz_word : sz_long, "dst", "", "");
+	genastore ("dst", curi->smode, "srcreg",
+		   curi->size == sz_word ? sz_word : sz_long, "src", xlateflag);
+	break;
+    case i_MVMEL:
+	genmovemel (opcode);
+	break;
+    case i_MVMLE:
+	genmovemle (opcode);
+	break;
+    case i_TRAP:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tException(src+32,0);\n");
+	m68k_pc_offset = 0;
+	break;
+    case i_MVR2USP:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tregs.usp = src;\n");
+	break;
+    case i_MVUSP2R:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("regs.usp", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_RESET:
+	printf ("\tcustomreset();\n");
+	break;
+    case i_NOP:
+	break;
+    case i_STOP:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tregs.sr = src;\n");
+	printf ("\tMakeFromSR();\n");
+	printf ("\tm68k_setstopped(1);\n");
+	break;
+    case i_RTE:
+	if (cpu_level == 0) {
+	    genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    printf ("\tregs.sr = sr; m68k_setpc_rte(pc);\n");
+	    fill_prefetch_0 ();
+	    printf ("\tMakeFromSR();\n");
+	} else {
+	    int old_brace_level = n_braces;
+	    if (next_cpu_level < 0)
+		next_cpu_level = 0;
+	    printf ("\tuae_u16 newsr; uae_u32 newpc; for (;;) {\n");
+	    genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    genamode (Aipi, "7", sz_word, "format", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	    printf ("\tnewsr = sr; newpc = pc;\n");
+	    printf ("\tif ((format & 0xF000) == 0x0000) { break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x1000) { ; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x2000) { m68k_areg(regs, 7) += 4; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x7000) { in_exception_2--; write_log(\"RTE: 2\\n\"); m68k_areg(regs, 7) += 60; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x8000) { m68k_areg(regs, 7) += 50; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0x9000) { m68k_areg(regs, 7) += 12; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0xa000) { m68k_areg(regs, 7) += 24; break; }\n");
+	    printf ("\telse if ((format & 0xF000) == 0xb000) { m68k_areg(regs, 7) += 84; break; }\n");
+	    printf ("\telse { Exception(14,0); goto %s; }\n", endlabelstr);
+	    printf ("\tregs.sr = newsr; MakeFromSR();\n}\n");
+	    pop_braces (old_brace_level);
+	    printf ("\tregs.sr = newsr; MakeFromSR();\n");
+	    printf ("\tm68k_setpc_rte(newpc);\n");
+	    fill_prefetch_0 ();
+	    need_endlabel = 1;
+	}
+	/* PC is set and prefetch filled. */
+	m68k_pc_offset = 0;
+	break;
+    case i_RTD:
+	printf ("\tcompiler_flush_jsr_stack();\n");
+	genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->smode, "srcreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tm68k_areg(regs, 7) += offs;\n");
+	printf ("\tm68k_setpc_rte(pc);\n");
+	fill_prefetch_0 ();
+	/* PC is set and prefetch filled. */
+	m68k_pc_offset = 0;
+	break;
+    case i_LINK:
+	genamode (Apdi, "7", sz_long, "old", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->smode, "srcreg", sz_long, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("src", Apdi, "7", sz_long, "old", xlateflag);
+	genastore ("m68k_areg(regs, 7)", curi->smode, "srcreg", sz_long, "src", xlateflag);
+	genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tm68k_areg(regs, 7) += offs;\n");
+	break;
+    case i_UNLK:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tm68k_areg(regs, 7) = src;\n");
+	genamode (Aipi, "7", sz_long, "old", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("old", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_RTS:
+	printf ("\tm68k_do_rts();\n");
+	fill_prefetch_0 ();
+	m68k_pc_offset = 0;
+	break;
+    case i_TRAPV:
+	sync_m68k_pc ();
+	printf ("\tif (GET_VFLG) { Exception(7,m68k_getpc()); goto %s; }\n", endlabelstr);
+	need_endlabel = 1;
+	break;
+    case i_RTR:
+	printf ("\tcompiler_flush_jsr_stack();\n");
+	printf ("\tMakeSR();\n");
+	genamode (Aipi, "7", sz_word, "sr", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (Aipi, "7", sz_long, "pc", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tregs.sr &= 0xFF00; sr &= 0xFF;\n");
+	printf ("\tregs.sr |= sr; m68k_setpc(pc);\n");
+	fill_prefetch_0 ();
+	printf ("\tMakeFromSR();\n");
+	m68k_pc_offset = 0;
+	break;
+    case i_JSR:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
+	printf ("\tm68k_do_jsr(m68k_getpc() + %d, srca);\n", m68k_pc_offset);
+	fill_prefetch_0 ();
+	m68k_pc_offset = 0;
+	break;
+    case i_JMP:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
+	printf ("\tm68k_setpc(srca);\n");
+	fill_prefetch_0 ();
+	m68k_pc_offset = 0;
+	break;
+    case i_BSR:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
+	printf ("\tuae_s32 s = (uae_s32)src + 2;\n");
+	if (using_exception_3) {
+	    printf ("\tif (src & 1) {\n");
+	    printf ("\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n");
+	    printf ("\t\tlast_fault_for_exception_3 = m68k_getpc() + s;\n");
+	    printf ("\t\tlast_op_for_exception_3 = opcode; Exception(3,0); goto %s;\n", endlabelstr);
+	    printf ("\t}\n");
+	    need_endlabel = 1;
+	}
+	printf ("\tm68k_do_bsr(m68k_getpc() + %d, s);\n", m68k_pc_offset);
+	fill_prefetch_0 ();
+	m68k_pc_offset = 0;
+	break;
+    case i_Bcc:
+	if (curi->size == sz_long) {
+	    if (cpu_level < 2) {
+		printf ("\tm68k_incpc(2);\n");
+		printf ("\tif (!cctrue(%d)) goto %s;\n", curi->cc, endlabelstr);
+		printf ("\t\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n");
+		printf ("\t\tlast_fault_for_exception_3 = m68k_getpc() + 1;\n");
+		printf ("\t\tlast_op_for_exception_3 = opcode; Exception(3,0); goto %s;\n", endlabelstr);
+		need_endlabel = 1;
+	    } else {
+		if (next_cpu_level < 1)
+		    next_cpu_level = 1;
+	    }
+	}
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_PHYS);
+	printf ("\tif (!cctrue(%d)) goto didnt_jump;\n", curi->cc);
+	if (using_exception_3) {
+	    printf ("\tif (src & 1) {\n");
+	    printf ("\t\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n");
+	    printf ("\t\tlast_fault_for_exception_3 = m68k_getpc() + 2 + (uae_s32)src;\n");
+	    printf ("\t\tlast_op_for_exception_3 = opcode; Exception(3,0); goto %s;\n", endlabelstr);
+	    printf ("\t}\n");
+	    need_endlabel = 1;
+	}
+#ifdef USE_COMPILER
+	printf ("\tm68k_setpc_bcc(m68k_getpc() + 2 + (uae_s32)src);\n");
+#else
+	printf ("\tm68k_incpc ((uae_s32)src + 2);\n");
+#endif
+	fill_prefetch_0 ();
+	printf ("\treturn 5 * CYCLE_UNIT;\n");
+	printf ("didnt_jump:;\n");
+	need_endlabel = 1;
+	insn_n_cycles = curi->size == sz_byte ? 8 : 12;
+	break;
+    case i_LEA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("srca", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	break;
+    case i_PEA:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_NO_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (Apdi, "7", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genastore ("srca", Apdi, "7", sz_long, "dst", xlateflag);
+	break;
+    case i_DBcc:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "offs", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+
+	printf ("\tif (!cctrue(%d)) {\n", curi->cc);
+	genastore ("(src-1)", curi->smode, "srcreg", curi->size, "src", xlateflag);
+
+	printf ("\t\tif (src) {\n");
+	if (using_exception_3) {
+	    printf ("\t\t\tif (offs & 1) {\n");
+	    printf ("\t\t\tlast_addr_for_exception_3 = m68k_getpc() + 2;\n");
+	    printf ("\t\t\tlast_fault_for_exception_3 = m68k_getpc() + 2 + (uae_s32)offs + 2;\n");
+	    printf ("\t\t\tlast_op_for_exception_3 = opcode; Exception(3,0); goto %s;\n", endlabelstr);
+	    printf ("\t\t}\n");
+	    need_endlabel = 1;
+	}
+#ifdef USE_COMPILER
+	printf ("\t\t\tm68k_setpc_bcc(m68k_getpc() + (uae_s32)offs + 2);\n");
+#else
+	printf ("\t\t\tm68k_incpc((uae_s32)offs + 2);\n");
+#endif
+	fill_prefetch_0 ();
+	/* ??? Cycle count is a guess.  */
+	printf ("\t\treturn 6 * CYCLE_UNIT;\n");
+	printf ("\t\t}\n");
+	printf ("\t}\n");
+	insn_n_cycles = 12;
+	need_endlabel = 1;
+	break;
+    case i_Scc:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tint val = cctrue(%d) ? 0xff : 0;\n", curi->cc);
+	genastore ("val", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_DIVU:
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	/* Clear V flag when dividing by zero - Alcatraz Odyssey demo depends
+	 * on this (actually, it's doing a DIVS).  */
+	printf ("\tif (src == 0) { SET_VFLG (0); Exception (5, oldpc); goto %s; } else {\n", endlabelstr);
+	printf ("\tuae_u32 newv = (uae_u32)dst / (uae_u32)(uae_u16)src;\n");
+	printf ("\tuae_u32 rem = (uae_u32)dst %% (uae_u32)(uae_u16)src;\n");
+	/* The N flag appears to be set each time there is an overflow.
+	 * Weird. */
+	printf ("\tif (newv > 0xffff) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n");
+	genflags (flag_logical, sz_word, "newv", "", "");
+	printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	printf ("\t}\n");
+	printf ("\t}\n");
+	insn_n_cycles += 136;
+	need_endlabel = 1;
+	break;
+    case i_DIVS:
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tif (src == 0) { SET_VFLG (0); Exception(5,oldpc); goto %s; } else {\n", endlabelstr);
+	printf ("\tuae_s32 newv = (uae_s32)dst / (uae_s32)(uae_s16)src;\n");
+	printf ("\tuae_u16 rem = (uae_s32)dst %% (uae_s32)(uae_s16)src;\n");
+	printf ("\tif ((newv & 0xffff8000) != 0 && (newv & 0xffff8000) != 0xffff8000) { SET_VFLG (1); SET_NFLG (1); SET_CFLG (0); } else\n\t{\n");
+	printf ("\tif (((uae_s16)rem < 0) != ((uae_s32)dst < 0)) rem = -rem;\n");
+	genflags (flag_logical, sz_word, "newv", "", "");
+	printf ("\tnewv = (newv & 0xffff) | ((uae_u32)rem << 16);\n");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	printf ("\t}\n");
+	printf ("\t}\n");
+	insn_n_cycles += 154;
+	need_endlabel = 1;
+	break;
+    case i_MULU:
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = (uae_u32)(uae_u16)dst * (uae_u32)(uae_u16)src;\n");
+	genflags (flag_logical, sz_long, "newv", "", "");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	insn_n_cycles += 66;
+	break;
+    case i_MULS:
+	genamode (curi->smode, "srcreg", sz_word, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_word, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_u32 newv = (uae_s32)(uae_s16)dst * (uae_s32)(uae_s16)src;\n");
+	genflags (flag_logical, sz_long, "newv", "", "");
+	genastore ("newv", curi->dmode, "dstreg", sz_long, "dst", xlateflag);
+	insn_n_cycles += 66;
+	break;
+    case i_CHK:
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tif ((uae_s32)dst < 0) { SET_NFLG (1); Exception(6,oldpc); goto %s; }\n", endlabelstr);
+	printf ("\telse if (dst > src) { SET_NFLG (0); Exception(6,oldpc); goto %s; }\n", endlabelstr);
+	need_endlabel = 1;
+	break;
+
+    case i_CHK2:
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\t{uae_s32 upper,lower,reg = regs.regs[(extra >> 12) & 15];\n");
+	switch (curi->size) {
+	case sz_byte:
+	    printf ("\tlower=(uae_s32)(uae_s8)get_byte(dsta); upper = (uae_s32)(uae_s8)get_byte(dsta+1);\n");
+	    printf ("\tif ((extra & 0x8000) == 0) reg = (uae_s32)(uae_s8)reg;\n");
+	    break;
+	case sz_word:
+	    printf ("\tlower=(uae_s32)(uae_s16)get_word(dsta); upper = (uae_s32)(uae_s16)get_word(dsta+2);\n");
+	    printf ("\tif ((extra & 0x8000) == 0) reg = (uae_s32)(uae_s16)reg;\n");
+	    break;
+	case sz_long:
+	    printf ("\tlower=get_long(dsta); upper = get_long(dsta+4);\n");
+	    break;
+	default:
+	    abort ();
+	}
+	printf ("\tSET_ZFLG (upper == reg || lower == reg);\n");
+	printf ("\tSET_CFLG (lower <= upper ? reg < lower || reg > upper : reg > upper || reg < lower);\n");
+	printf ("\tif ((extra & 0x800) && GET_CFLG) { Exception(6,oldpc); goto %s; }\n}\n", endlabelstr);
+	need_endlabel = 1;
+	break;
+
+    case i_ASR:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 sign = (%s & val) >> %d;\n", cmask (curi->size), bit_size (curi->size) - 1);
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	printf ("\tif (cnt >= %d) {\n", bit_size (curi->size));
+	printf ("\t\tval = %s & (uae_u32)-sign;\n", bit_mask (curi->size));
+	printf ("\t\tSET_CFLG (sign);\n");
+	duplicate_carry ();
+	if (source_is_imm1_8 (curi))
+	    printf ("\t} else {\n");
+	else
+	    printf ("\t} else if (cnt > 0) {\n");
+	printf ("\t\tval >>= cnt - 1;\n");
+	printf ("\t\tSET_CFLG (val & 1);\n");
+	duplicate_carry ();
+	printf ("\t\tval >>= 1;\n");
+	printf ("\t\tval |= (%s << (%d - cnt)) & (uae_u32)-sign;\n",
+		bit_mask (curi->size),
+		bit_size (curi->size));
+	printf ("\t\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\t}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ASL:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	printf ("\tif (cnt >= %d) {\n", bit_size (curi->size));
+	printf ("\t\tSET_VFLG (val != 0);\n");
+	printf ("\t\tSET_CFLG (cnt == %d ? val & 1 : 0);\n",
+		bit_size (curi->size));
+	duplicate_carry ();
+	printf ("\t\tval = 0;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("\t} else {\n");
+	else
+	    printf ("\t} else if (cnt > 0) {\n");
+	printf ("\t\tuae_u32 mask = (%s << (%d - cnt)) & %s;\n",
+		bit_mask (curi->size),
+		bit_size (curi->size) - 1,
+		bit_mask (curi->size));
+	printf ("\t\tSET_VFLG ((val & mask) != mask && (val & mask) != 0);\n");
+	printf ("\t\tval <<= cnt - 1;\n");
+	printf ("\t\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1);
+	duplicate_carry ();
+	printf ("\t\tval <<= 1;\n");
+	printf ("\t\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\t}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_LSR:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	printf ("\tif (cnt >= %d) {\n", bit_size (curi->size));
+	printf ("\t\tSET_CFLG ((cnt == %d) & (val >> %d));\n",
+		bit_size (curi->size), bit_size (curi->size) - 1);
+	duplicate_carry ();
+	printf ("\t\tval = 0;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("\t} else {\n");
+	else
+	    printf ("\t} else if (cnt > 0) {\n");
+	printf ("\t\tval >>= cnt - 1;\n");
+	printf ("\t\tSET_CFLG (val & 1);\n");
+	duplicate_carry ();
+	printf ("\t\tval >>= 1;\n");
+	printf ("\t}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_LSL:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	printf ("\tif (cnt >= %d) {\n", bit_size (curi->size));
+	printf ("\t\tSET_CFLG (cnt == %d ? val & 1 : 0);\n",
+		bit_size (curi->size));
+	duplicate_carry ();
+	printf ("\t\tval = 0;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("\t} else {\n");
+	else
+	    printf ("\t} else if (cnt > 0) {\n");
+	printf ("\t\tval <<= (cnt - 1);\n");
+	printf ("\t\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1);
+	duplicate_carry ();
+	printf ("\t\tval <<= 1;\n");
+	printf ("\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\t}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROL:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("{");
+	else
+	    printf ("\tif (cnt > 0) {\n");
+	printf ("\tuae_u32 loval;\n");
+	printf ("\tcnt &= %d;\n", bit_size (curi->size) - 1);
+	printf ("\tloval = val >> (%d - cnt);\n", bit_size (curi->size));
+	printf ("\tval <<= cnt;\n");
+	printf ("\tval |= loval;\n");
+	printf ("\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\tSET_CFLG (val & 1);\n");
+	printf ("}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROR:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("{");
+	else
+	    printf ("\tif (cnt > 0) {");
+	printf ("\tuae_u32 hival;\n");
+	printf ("\tcnt &= %d;\n", bit_size (curi->size) - 1);
+	printf ("\thival = val << (%d - cnt);\n", bit_size (curi->size));
+	printf ("\tval >>= cnt;\n");
+	printf ("\tval |= hival;\n");
+	printf ("\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\tSET_CFLG ((val & %s) >> %d);\n", cmask (curi->size), bit_size (curi->size) - 1);
+	printf ("\t}\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROXL:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("{");
+	else {
+	    force_range_for_rox ("cnt", curi->size);
+	    printf ("\tif (cnt > 0) {\n");
+	}
+	printf ("\tcnt--;\n");
+	printf ("\t{\n\tuae_u32 carry;\n");
+	printf ("\tuae_u32 loval = val >> (%d - cnt);\n", bit_size (curi->size) - 1);
+	printf ("\tcarry = loval & 1;\n");
+	printf ("\tval = (((val << 1) | GET_XFLG) << cnt) | (loval >> 1);\n");
+	printf ("\tSET_XFLG (carry);\n");
+	printf ("\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\t} }\n");
+	printf ("\tSET_CFLG (GET_XFLG);\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROXR:
+	genamode (curi->smode, "srcreg", curi->size, "cnt", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tcnt &= 63;\n");
+	printf ("\tCLEAR_CZNV;\n");
+	if (source_is_imm1_8 (curi))
+	    printf ("{");
+	else {
+	    force_range_for_rox ("cnt", curi->size);
+	    printf ("\tif (cnt > 0) {\n");
+	}
+	printf ("\tcnt--;\n");
+	printf ("\t{\n\tuae_u32 carry;\n");
+	printf ("\tuae_u32 hival = (val << 1) | GET_XFLG;\n");
+	printf ("\thival <<= (%d - cnt);\n", bit_size (curi->size) - 1);
+	printf ("\tval >>= cnt;\n");
+	printf ("\tcarry = val & 1;\n");
+	printf ("\tval >>= 1;\n");
+	printf ("\tval |= hival;\n");
+	printf ("\tSET_XFLG (carry);\n");
+	printf ("\tval &= %s;\n", bit_mask (curi->size));
+	printf ("\t} }\n");
+	printf ("\tSET_CFLG (GET_XFLG);\n");
+	genflags (flag_logical_noclobber, curi->size, "val", "", "");
+	genastore ("val", curi->dmode, "dstreg", curi->size, "data", xlateflag);
+	break;
+    case i_ASRW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 sign = %s & val;\n", cmask (curi->size));
+	printf ("\tuae_u32 cflg = val & 1;\n");
+	printf ("\tval = (val >> 1) | sign;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("\tSET_CFLG (cflg);\n");
+	duplicate_carry ();
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_ASLW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 sign = %s & val;\n", cmask (curi->size));
+	printf ("\tuae_u32 sign2;\n");
+	printf ("\tval <<= 1;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("\tsign2 = %s & val;\n", cmask (curi->size));
+	printf ("\tSET_CFLG (sign != 0);\n");
+	duplicate_carry ();
+
+	printf ("\tSET_VFLG (GET_VFLG | (sign2 != sign));\n");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_LSRW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u32 val = (uae_u8)data;\n"); break;
+	case sz_word: printf ("\tuae_u32 val = (uae_u16)data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & 1;\n");
+	printf ("\tval >>= 1;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry);\n");
+	duplicate_carry ();
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_LSLW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
+	case sz_word: printf ("\tuae_u16 val = data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & %s;\n", cmask (curi->size));
+	printf ("\tval <<= 1;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
+	duplicate_carry ();
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROLW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
+	case sz_word: printf ("\tuae_u16 val = data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & %s;\n", cmask (curi->size));
+	printf ("\tval <<= 1;\n");
+	printf ("\tif (carry)  val |= 1;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_RORW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
+	case sz_word: printf ("\tuae_u16 val = data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & 1;\n");
+	printf ("\tval >>= 1;\n");
+	printf ("\tif (carry) val |= %s;\n", cmask (curi->size));
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry);\n");
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROXLW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
+	case sz_word: printf ("\tuae_u16 val = data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & %s;\n", cmask (curi->size));
+	printf ("\tval <<= 1;\n");
+	printf ("\tif (GET_XFLG) val |= 1;\n");
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry >> %d);\n", bit_size (curi->size) - 1);
+	duplicate_carry ();
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_ROXRW:
+	genamode (curi->smode, "srcreg", curi->size, "data", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	switch (curi->size) {
+	case sz_byte: printf ("\tuae_u8 val = data;\n"); break;
+	case sz_word: printf ("\tuae_u16 val = data;\n"); break;
+	case sz_long: printf ("\tuae_u32 val = data;\n"); break;
+	default: abort ();
+	}
+	printf ("\tuae_u32 carry = val & 1;\n");
+	printf ("\tval >>= 1;\n");
+	printf ("\tif (GET_XFLG) val |= %s;\n", cmask (curi->size));
+	genflags (flag_logical, curi->size, "val", "", "");
+	printf ("SET_CFLG (carry);\n");
+	duplicate_carry ();
+	genastore ("val", curi->smode, "srcreg", curi->size, "data", xlateflag);
+	break;
+    case i_MOVEC2:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tint regno = (src >> 12) & 15;\n");
+	printf ("\tuae_u32 *regp = regs.regs + regno;\n");
+	printf ("\tif (! m68k_movec2(src & 0xFFF, regp)) goto %s;\n", endlabelstr);
+	break;
+    case i_MOVE2C:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tint regno = (src >> 12) & 15;\n");
+	printf ("\tuae_u32 *regp = regs.regs + regno;\n");
+	printf ("\tif (! m68k_move2c(src & 0xFFF, regp)) goto %s;\n", endlabelstr);
+	break;
+    case i_CAS:
+    {
+	int old_brace_level;
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tint ru = (src >> 6) & 7;\n");
+	printf ("\tint rc = src & 7;\n");
+	genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, rc)", "dst");
+	printf ("\tif (GET_ZFLG)");
+	old_brace_level = n_braces;
+	start_brace ();
+	genastore ("(m68k_dreg(regs, ru))", curi->dmode, "dstreg", curi->size, "dst", xlateflag);
+	pop_braces (old_brace_level);
+	printf ("else");
+	start_brace ();
+	printf ("m68k_dreg(regs, rc) = dst;\n");
+	pop_braces (old_brace_level);
+    }
+    break;
+    case i_CAS2:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tuae_u32 rn1 = regs.regs[(extra >> 28) & 15];\n");
+	printf ("\tuae_u32 rn2 = regs.regs[(extra >> 12) & 15];\n");
+	if (curi->size == sz_word) {
+	    int old_brace_level = n_braces;
+	    printf ("\tuae_u16 dst1 = get_word(rn1), dst2 = get_word(rn2);\n");
+	    genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1");
+	    printf ("\tif (GET_ZFLG) {\n");
+	    genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2");
+	    printf ("\tif (GET_ZFLG) {\n");
+	    printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n");
+	    printf ("\tput_word(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n");
+	    printf ("\t}}\n");
+	    pop_braces (old_brace_level);
+	    printf ("\tif (! GET_ZFLG) {\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = (m68k_dreg(regs, (extra >> 22) & 7) & ~0xffff) | (dst1 & 0xffff);\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = (m68k_dreg(regs, (extra >> 6) & 7) & ~0xffff) | (dst2 & 0xffff);\n");
+	    printf ("\t}\n");
+	} else {
+	    int old_brace_level = n_braces;
+	    printf ("\tuae_u32 dst1 = get_long(rn1), dst2 = get_long(rn2);\n");
+	    genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, (extra >> 16) & 7)", "dst1");
+	    printf ("\tif (GET_ZFLG) {\n");
+	    genflags (flag_cmp, curi->size, "newv", "m68k_dreg(regs, extra & 7)", "dst2");
+	    printf ("\tif (GET_ZFLG) {\n");
+	    printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 22) & 7));\n");
+	    printf ("\tput_long(rn1, m68k_dreg(regs, (extra >> 6) & 7));\n");
+	    printf ("\t}}\n");
+	    pop_braces (old_brace_level);
+	    printf ("\tif (! GET_ZFLG) {\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 22) & 7) = dst1;\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 6) & 7) = dst2;\n");
+	    printf ("\t}\n");
+	}
+	break;
+    case i_MOVES:
+    {
+	int old_brace_level;
+
+		genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+		start_brace();
+		printf ("\tif (extra & 0x0800)\n");	/* from reg to ea */
+		{
+			/* use DFC */
+	old_brace_level = n_braces;
+	start_brace ();
+	printf ("\tuae_u32 src = regs.regs[(extra >> 12) & 15];\n");
+			nexti_no_inc = 1; /* prevent strange problems with misaligned insns */
+			genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_DFC);
+			nexti_no_inc = 0;
+			genastore ("src", curi->dmode, "dstreg", curi->size, "dst", XLATE_DFC);
+	pop_braces (old_brace_level);
+		}
+		printf ("else");	/* from ea to reg */
+		{
+			/* use SFC */
+	start_brace ();
+			genamode (curi->dmode, "dstreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_SFC);
+			printf ("\tif (extra & 0x8000) {\n");	/* address/data */
+	switch (curi->size) {
+	case sz_byte: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s8)src;\n"); break;
+	case sz_word: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = (uae_s32)(uae_s16)src;\n"); break;
+	case sz_long: printf ("\tm68k_areg(regs, (extra >> 12) & 7) = src;\n"); break;
+	default: abort ();
+	}
+	printf ("\t} else {\n");
+			genastore ("src", Dreg, "(extra >> 12) & 7", curi->size, "", XLATE_LOG);
+	printf ("\t}\n");
+	pop_braces (old_brace_level);
+    }
+	}
+    break;
+    case i_BKPT:		/* only needed for hardware emulators */
+	sync_m68k_pc ();
+	printf ("\top_illg(opcode);\n");
+	break;
+    case i_CALLM:		/* not present in 68030 */
+	sync_m68k_pc ();
+	printf ("\top_illg(opcode);\n");
+	break;
+    case i_RTM:		/* not present in 68030 */
+	sync_m68k_pc ();
+	printf ("\top_illg(opcode);\n");
+	break;
+    case i_TRAPcc:
+	if (curi->smode != am_unknown && curi->smode != am_illg)
+	    genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	printf ("\tif (cctrue(%d)) { Exception(7,m68k_getpc()); goto %s; }\n", curi->cc, endlabelstr);
+	need_endlabel = 1;
+	break;
+    case i_DIVL:
+	sync_m68k_pc ();
+	start_brace ();
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tm68k_divl(opcode, dst, extra, oldpc);\n");
+	break;
+    case i_MULL:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", curi->size, "dst", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tm68k_mull(opcode, dst, extra);\n");
+	break;
+    case i_BFTST:
+    case i_BFEXTU:
+    case i_BFCHG:
+    case i_BFEXTS:
+    case i_BFCLR:
+    case i_BFFFO:
+    case i_BFSET:
+    case i_BFINS:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genamode (curi->dmode, "dstreg", sz_long, "dst", GENA_GETV_FETCH_ALIGN, GENA_MOVEM_DO_INC, XLATE_LOG);
+	start_brace ();
+	printf ("\tuae_s32 offset = extra & 0x800 ? m68k_dreg(regs, (extra >> 6) & 7) : (extra >> 6) & 0x1f;\n");
+	printf ("\tint width = (((extra & 0x20 ? m68k_dreg(regs, extra & 7) : extra) -1) & 0x1f) +1;\n");
+	if (curi->dmode == Dreg) {
+	    printf ("\tuae_u32 tmp = m68k_dreg(regs, dstreg) << (offset & 0x1f);\n");
+	} else {
+	    printf ("\tuae_u32 tmp,bf0,bf1;\n");
+	    printf ("\tdsta += (offset >> 3) | (offset & 0x80000000 ? ~0x1fffffff : 0);\n");
+	    printf ("\tbf0 = get_long(dsta);bf1 = get_byte(dsta+4) & 0xff;\n");
+	    printf ("\ttmp = (bf0 << (offset & 7)) | (bf1 >> (8 - (offset & 7)));\n");
+	}
+	printf ("\ttmp >>= (32 - width);\n");
+	printf ("\tSET_NFLG (tmp & (1 << (width-1)) ? 1 : 0);\n");
+	printf ("\tSET_ZFLG (tmp == 0); SET_VFLG (0); SET_CFLG (0);\n");
+	switch (curi->mnemo) {
+	case i_BFTST:
+	    break;
+	case i_BFEXTU:
+	    printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n");
+	    break;
+	case i_BFCHG:
+	    printf ("\ttmp = ~tmp;\n");
+	    break;
+	case i_BFEXTS:
+	    printf ("\tif (GET_NFLG) tmp |= width == 32 ? 0 : (-1 << width);\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = tmp;\n");
+	    break;
+	case i_BFCLR:
+	    printf ("\ttmp = 0;\n");
+	    break;
+	case i_BFFFO:
+	    printf ("\t{ uae_u32 mask = 1 << (width-1);\n");
+	    printf ("\twhile (mask) { if (tmp & mask) break; mask >>= 1; offset++; }}\n");
+	    printf ("\tm68k_dreg(regs, (extra >> 12) & 7) = offset;\n");
+	    break;
+	case i_BFSET:
+	    printf ("\ttmp = 0xffffffff;\n");
+	    break;
+	case i_BFINS:
+	    printf ("\ttmp = m68k_dreg(regs, (extra >> 12) & 7);\n");
+	    printf ("\tSET_NFLG (tmp & (1 << (width - 1)) ? 1 : 0);\n");
+	    printf ("\tSET_ZFLG (tmp == 0);\n");
+	    break;
+	default:
+	    break;
+	}
+	if (curi->mnemo == i_BFCHG
+	    || curi->mnemo == i_BFCLR
+	    || curi->mnemo == i_BFSET
+	    || curi->mnemo == i_BFINS)
+	    {
+		printf ("\ttmp <<= (32 - width);\n");
+		if (curi->dmode == Dreg) {
+		    printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & ((offset & 0x1f) == 0 ? 0 :\n");
+		    printf ("\t\t(0xffffffff << (32 - (offset & 0x1f))))) |\n");
+		    printf ("\t\t(tmp >> (offset & 0x1f)) |\n");
+		    printf ("\t\t(((offset & 0x1f) + width) >= 32 ? 0 :\n");
+		    printf (" (m68k_dreg(regs, dstreg) & ((uae_u32)0xffffffff >> ((offset & 0x1f) + width))));\n");
+		} else {
+		    printf ("\tbf0 = (bf0 & (0xff000000 << (8 - (offset & 7)))) |\n");
+		    printf ("\t\t(tmp >> (offset & 7)) |\n");
+		    printf ("\t\t(((offset & 7) + width) >= 32 ? 0 :\n");
+		    printf ("\t\t (bf0 & ((uae_u32)0xffffffff >> ((offset & 7) + width))));\n");
+		    printf ("\tput_long(dsta,bf0 );\n");
+		    printf ("\tif (((offset & 7) + width) > 32) {\n");
+		    printf ("\t\tbf1 = (bf1 & (0xff >> (width - 32 + (offset & 7)))) |\n");
+		    printf ("\t\t\t(tmp << (8 - (offset & 7)));\n");
+		    printf ("\t\tput_byte(dsta+4,bf1);\n");
+		    printf ("\t}\n");
+		}
+	    }
+	break;
+    case i_PACK:
+	if (curi->smode == Dreg) {
+	    printf ("\tuae_u16 val = m68k_dreg(regs, srcreg) + %s;\n", gen_nextiword ());
+	    printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffffff00) | ((val >> 4) & 0xf0) | (val & 0xf);\n");
+	} else {
+	    printf ("\tuae_u16 val;\n");
+	    printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n");
+	    printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n");
+	    printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n");
+	    printf ("\tval = (val | ((uae_u16)get_byte(m68k_areg(regs, srcreg)) << 8)) + %s;\n", gen_nextiword ());
+	    printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n");
+	    printf ("\tput_byte(m68k_areg(regs, dstreg),((val >> 4) & 0xf0) | (val & 0xf));\n");
+	}
+	break;
+    case i_UNPK:
+	if (curi->smode == Dreg) {
+	    printf ("\tuae_u16 val = m68k_dreg(regs, srcreg);\n");
+	    printf ("\tval = (((val << 4) & 0xf00) | (val & 0xf)) + %s;\n", gen_nextiword ());
+	    printf ("\tm68k_dreg(regs, dstreg) = (m68k_dreg(regs, dstreg) & 0xffff0000) | (val & 0xffff);\n");
+	} else {
+	    printf ("\tuae_u16 val;\n");
+	    printf ("\tm68k_areg(regs, srcreg) -= areg_byteinc[srcreg];\n");
+	    printf ("\tval = (uae_u16)get_byte(m68k_areg(regs, srcreg));\n");
+	    printf ("\tval = (((val << 4) & 0xf00) | (val & 0xf)) + %s;\n", gen_nextiword ());
+	    printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n");
+	    printf ("\tput_byte(m68k_areg(regs, dstreg),val);\n");
+	    printf ("\tm68k_areg(regs, dstreg) -= areg_byteinc[dstreg];\n");
+	    printf ("\tput_byte(m68k_areg(regs, dstreg),val >> 8);\n");
+	}
+	break;
+    case i_TAS:
+	genamode (curi->smode, "srcreg", curi->size, "src", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	genflags (flag_logical, curi->size, "src", "", "");
+	printf ("\tsrc |= 0x80;\n");
+	genastore ("src", curi->smode, "srcreg", curi->size, "src", xlateflag);
+	break;
+    case i_FPP:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tfpp_opp(opcode,extra);\n");
+	break;
+    case i_FDBcc:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tfdbcc_opp(opcode,extra);\n");
+	break;
+    case i_FScc:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tfscc_opp(opcode,extra);\n");
+	break;
+    case i_FTRAPcc:
+	sync_m68k_pc ();
+	start_brace ();
+	printf ("\tuaecptr oldpc = m68k_getpc();\n");
+	if (curi->smode != am_unknown && curi->smode != am_illg)
+	    genamode (curi->smode, "srcreg", curi->size, "dummy", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tftrapcc_opp(opcode,oldpc);\n");
+	break;
+    case i_FBcc:
+	sync_m68k_pc ();
+	start_brace ();
+	printf ("\tuaecptr pc = m68k_getpc();\n");
+	genamode (curi->dmode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tfbcc_opp(opcode,pc,extra);\n");
+	break;
+    case i_FSAVE:
+	sync_m68k_pc ();
+	printf ("\tfsave_opp(opcode);\n");
+	break;
+    case i_FRESTORE:
+	sync_m68k_pc ();
+	printf ("\tfrestore_opp(opcode);\n");
+	break;
+
+     case i_CINVL:
+     case i_CINVP:
+     case i_CINVA:
+     case i_CPUSHL:
+     case i_CPUSHP:
+     case i_CPUSHA:
+	break;
+     case i_MOVE16:
+	 if ((opcode & 0xfff8) == 0xf620) {
+	     /* MOVE16 (Ax)+,(Ay)+ */
+	     printf ("\tuaecptr mems = m68k_areg(regs, srcreg) & ~15, memd;\n");
+	     printf ("\tdstreg = (%s >> 12) & 7;\n", gen_nextiword());
+	     printf ("\tmemd = m68k_areg(regs, dstreg) & ~15;\n");
+	     printf ("\tput_long(memd, get_long(mems));\n");
+	     printf ("\tput_long(memd+4, get_long(mems+4));\n");
+	     printf ("\tput_long(memd+8, get_long(mems+8));\n");
+	     printf ("\tput_long(memd+12, get_long(mems+12));\n");
+	     printf ("\tif (srcreg != dstreg)\n");
+	     printf ("\tm68k_areg(regs, srcreg) += 16;\n");
+	     printf ("\tm68k_areg(regs, dstreg) += 16;\n");
+	 } else {
+	     /* Other variants */
+	     genamode (curi->smode, "srcreg", curi->size, "mems", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG);
+	     genamode (curi->dmode, "dstreg", curi->size, "memd", GENA_GETV_NO_FETCH, GENA_MOVEM_MOVE16, XLATE_LOG);
+	     printf ("\tmemsa &= ~15;\n");
+	     printf ("\tmemda &= ~15;\n");
+	     printf ("\tput_long(memda, get_long(memsa));\n");
+	     printf ("\tput_long(memda+4, get_long(memsa+4));\n");
+	     printf ("\tput_long(memda+8, get_long(memsa+8));\n");
+	     printf ("\tput_long(memda+12, get_long(memsa+12));\n");
+	     if ((opcode & 0xfff8) == 0xf600)
+                 printf ("\tm68k_areg(regs, srcreg) += 16;\n");
+	     else if ((opcode & 0xfff8) == 0xf608)
+		 printf ("\tm68k_areg(regs, dstreg) += 16;\n");
+	 }
+	 break;
+
+    case i_MMUOP:
+	genamode (curi->smode, "srcreg", curi->size, "extra", GENA_GETV_FETCH, GENA_MOVEM_DO_INC, XLATE_LOG);
+	sync_m68k_pc ();
+	printf ("\tmmu_op(opcode,extra);\n");
+	break;
+    default:
+	abort ();
+	break;
+    }
+    finish_braces ();
+    sync_m68k_pc ();
+}
+
+static void generate_includes (FILE * f)
+{
+    fprintf (f, "#include \"sysconfig.h\"\n");
+    fprintf (f, "#include \"sysdeps.h\"\n");
+    fprintf (f, "#include \"config.h\"\n");
+    fprintf (f, "#include \"options.h\"\n");
+    fprintf (f, "#include \"memory.h\"\n");
+    fprintf (f, "#include \"custom.h\"\n");
+    fprintf (f, "#include \"newcpu.h\"\n");
+    fprintf (f, "#include \"compiler.h\"\n");
+    fprintf (f, "#include \"cputbl.h\"\n");
+    
+    fprintf (f, "#define CPUFUNC(x) x##_ff\n"
+	     "#ifdef NOFLAGS\n"
+	     "#include \"noflags.h\"\n"
+	     "#endif\n");
+}
+
+static int postfix;
+
+static void generate_one_opcode (int rp)
+{
+    int i;
+    uae_u16 smsk, dmsk;
+    long int opcode = opcode_map[rp];
+
+    if (table68k[opcode].mnemo == i_ILLG
+	|| table68k[opcode].clev > cpu_level)
+	return;
+
+    for (i = 0; lookuptab[i].name[0]; i++) {
+	if (table68k[opcode].mnemo == lookuptab[i].mnemo)
+	    break;
+    }
+
+    if (table68k[opcode].handler != -1)
+	return;
+
+    if (opcode_next_clev[rp] != cpu_level) {
+	fprintf (stblfile, "{ CPUFUNC(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, opcode_last_postfix[rp],
+		 opcode, lookuptab[i].name);
+	return;
+    }
+    fprintf (stblfile, "{ CPUFUNC(op_%lx_%d), 0, %ld }, /* %s */\n", opcode, postfix, opcode, lookuptab[i].name);
+    fprintf (headerfile, "extern cpuop_func op_%lx_%d_nf;\n", opcode, postfix);
+    fprintf (headerfile, "extern cpuop_func op_%lx_%d_ff;\n", opcode, postfix);
+    printf ("unsigned long REGPARAM2 CPUFUNC(op_%lx_%d)(uae_u32 opcode) /* %s */\n{\n", opcode, postfix, lookuptab[i].name);
+
+    switch (table68k[opcode].stype) {
+    case 0: smsk = 7; break;
+    case 1: smsk = 255; break;
+    case 2: smsk = 15; break;
+    case 3: smsk = 7; break;
+    case 4: smsk = 7; break;
+    case 5: smsk = 63; break;
+    case 7: smsk = 3; break;
+    default: abort ();
+    }
+    dmsk = 7;
+
+    next_cpu_level = -1;
+    if (table68k[opcode].suse
+	&& table68k[opcode].smode != imm && table68k[opcode].smode != imm0
+	&& table68k[opcode].smode != imm1 && table68k[opcode].smode != imm2
+	&& table68k[opcode].smode != absw && table68k[opcode].smode != absl
+	&& table68k[opcode].smode != PC8r && table68k[opcode].smode != PC16)
+    {
+	if (table68k[opcode].spos == -1) {
+	    if (((int) table68k[opcode].sreg) >= 128)
+		printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].sreg);
+	    else
+		printf ("\tuae_u32 srcreg = %d;\n", (int) table68k[opcode].sreg);
+	} else {
+	    char source[100];
+	    int pos = table68k[opcode].spos;
+
+	    if (pos)
+		sprintf (source, "((opcode >> %d) & %d)", pos, smsk);
+	    else
+		sprintf (source, "(opcode & %d)", smsk);
+
+	    if (table68k[opcode].stype == 3)
+		printf ("\tuae_u32 srcreg = imm8_table[%s];\n", source);
+	    else if (table68k[opcode].stype == 1)
+		printf ("\tuae_u32 srcreg = (uae_s32)(uae_s8)%s;\n", source);
+	    else
+		printf ("\tuae_u32 srcreg = %s;\n", source);
+	}
+    }
+    if (table68k[opcode].duse
+	/* Yes, the dmode can be imm, in case of LINK or DBcc */
+	&& table68k[opcode].dmode != imm && table68k[opcode].dmode != imm0
+	&& table68k[opcode].dmode != imm1 && table68k[opcode].dmode != imm2
+	&& table68k[opcode].dmode != absw && table68k[opcode].dmode != absl)
+    {
+	if (table68k[opcode].dpos == -1) {
+	    if (((int) table68k[opcode].dreg) >= 128)
+		printf ("\tuae_u32 dstreg = (uae_s32)(uae_s8)%d;\n", (int) table68k[opcode].dreg);
+	    else
+		printf ("\tuae_u32 dstreg = %d;\n", (int) table68k[opcode].dreg);
+	} else {
+	    int pos = table68k[opcode].dpos;
+#if 0
+	    /* Check that we can do the little endian optimization safely.  */
+	    if (pos < 8 && (dmsk >> (8 - pos)) != 0)
+		abort ();
+#endif	    
+	    if (pos)
+		printf ("\tuae_u32 dstreg = (opcode >> %d) & %d;\n",
+			pos, dmsk);
+	    else
+		printf ("\tuae_u32 dstreg = opcode & %d;\n", dmsk);
+	}
+    }
+    need_endlabel = 0;
+    endlabelno++;
+    sprintf (endlabelstr, "endlabel%d", endlabelno);
+    gen_opcode (opcode);
+    if (need_endlabel)
+	printf ("%s: ;\n", endlabelstr);
+    printf ("return %d;\n", insn_n_cycles * CYCLE_UNIT / 2);
+    printf ("}\n");
+    opcode_next_clev[rp] = next_cpu_level;
+    opcode_last_postfix[rp] = postfix;
+}
+
+static void generate_func (void)
+{
+    int i, j, rp;
+
+    using_prefetch = 0;
+    using_exception_3 = 0;
+	using_mmu = 0;
+
+    for (i = 0; i < 6; i++) {
+	cpu_level = 4 - i;
+	using_mmu = cpu_level == 4;
+	if (i == 5) {
+	    cpu_level = 0;
+	    using_prefetch = 1;
+	    using_exception_3 = 1;
+	    for (rp = 0; rp < nr_cpuop_funcs; rp++)
+		opcode_next_clev[rp] = 0;
+	}
+
+	postfix = i;
+	fprintf (stblfile, "struct cputbl CPUFUNC(op_smalltbl_%d)[] = {\n", postfix);
+
+	/* sam: this is for people with low memory (eg. me :)) */
+	printf ("\n"
+                "#if !defined(PART_1) && !defined(PART_2) && "
+	 	    "!defined(PART_3) && !defined(PART_4) && "
+		    "!defined(PART_5) && !defined(PART_6) && "
+		    "!defined(PART_7) && !defined(PART_8)"
+		"\n"
+	        "#define PART_1 1\n"
+	        "#define PART_2 1\n"
+	        "#define PART_3 1\n"
+	        "#define PART_4 1\n"
+	        "#define PART_5 1\n"
+	        "#define PART_6 1\n"
+	        "#define PART_7 1\n"
+	        "#define PART_8 1\n"
+	        "#endif\n\n");
+	
+	rp = 0;
+	for(j=1;j<=8;++j) {
+		int k = (j*nr_cpuop_funcs)/8;
+		printf ("#ifdef PART_%d\n",j);
+		for (; rp < k; rp++)
+		   generate_one_opcode (rp);
+		printf ("#endif\n\n");
+	}
+
+	fprintf (stblfile, "{ 0, 0, 0 }};\n");
+    }
+
+}
+
+int main (int argc, char **argv)
+{
+    read_table68k ();
+    do_merges ();
+
+    opcode_map = (int *) xmalloc (sizeof (int) * nr_cpuop_funcs);
+    opcode_last_postfix = (int *) xmalloc (sizeof (int) * nr_cpuop_funcs);
+    opcode_next_clev = (int *) xmalloc (sizeof (int) * nr_cpuop_funcs);
+    counts = (unsigned long *) xmalloc (65536 * sizeof (unsigned long));
+    read_counts ();
+
+    /* It would be a lot nicer to put all in one file (we'd also get rid of
+     * cputbl.h that way), but cpuopti can't cope.  That could be fixed, but
+     * I don't dare to touch the 68k version.  */
+
+    headerfile = fopen ("cputbl.h", "wb");
+    stblfile = fopen ("cpustbl.c", "wb");
+    freopen ("cpuemu.c", "wb", stdout);
+
+    generate_includes (stdout);
+    generate_includes (stblfile);
+
+    generate_func ();
+
+    free (table68k);
+    return 0;
+}
diff -urN src-0.8.22/src/include/memory.h src-0.8.22-mmu/src/include/memory.h
--- src-0.8.22/src/include/memory.h	2001-11-19 18:52:21.000000000 +0100
+++ src-0.8.22-mmu/src/include/memory.h	2003-07-25 12:25:56.000000000 +0200
@@ -149,6 +149,7 @@
 
 #endif
 
+#if 0
 STATIC_INLINE uae_u32 get_long(uaecptr addr)
 {
     return longget_1(addr);
@@ -173,13 +174,14 @@
 {
     byteput_1(addr, b);
 }
+#endif
 
-STATIC_INLINE uae_u8 *get_real_address(uaecptr addr)
+STATIC_INLINE uae_u8 *phys_get_real_address(uaecptr addr)
 {
     return get_mem_bank(addr).xlateaddr(addr);
 }
 
-STATIC_INLINE int valid_address(uaecptr addr, uae_u32 size)
+STATIC_INLINE int phys_valid_address(uaecptr addr, uae_u32 size)
 {
     return get_mem_bank(addr).check(addr, size);
 }
diff -urN src-0.8.22/src/include/memory.h~ src-0.8.22-mmu/src/include/memory.h~
--- src-0.8.22/src/include/memory.h~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/include/memory.h~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,216 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * memory management
+  *
+  * Copyright 1995 Bernd Schmidt
+  * vim:ts=8:sw=4:
+  */
+
+extern void memory_reset (void);
+
+extern int special_mem;
+#define S_READ 1
+#define S_WRITE 2
+
+typedef uae_u32 (*mem_get_func)(uaecptr) REGPARAM;
+typedef void (*mem_put_func)(uaecptr, uae_u32) REGPARAM;
+typedef uae_u8 *(*xlate_func)(uaecptr) REGPARAM;
+typedef int (*check_func)(uaecptr, uae_u32) REGPARAM;
+
+extern char *address_space, *good_address_map;
+extern uae_u8 *chipmemory;
+
+extern uae_u32 allocated_chipmem;
+extern uae_u32 allocated_fastmem;
+extern uae_u32 allocated_bogomem;
+extern uae_u32 allocated_gfxmem;
+extern uae_u32 allocated_z3fastmem;
+extern uae_u32 allocated_a3000mem;
+
+#undef DIRECT_MEMFUNCS_SUCCESSFUL
+#include "machdep/maccess.h"
+
+#ifndef CAN_MAP_MEMORY
+#undef USE_COMPILER
+#endif
+
+#if defined(USE_COMPILER) && !defined(USE_MAPPED_MEMORY)
+#define USE_MAPPED_MEMORY
+#endif
+
+#define kickmem_size 0x080000
+
+#define chipmem_start 0x00000000
+#define bogomem_start 0x00C00000
+#define a3000mem_start 0x07000000
+#define kickmem_start 0x00F80000
+
+extern int ersatzkickfile;
+extern int cloanto_rom;
+
+extern uae_u8* baseaddr[];
+
+typedef struct {
+    /* These ones should be self-explanatory... */
+    mem_get_func lget, wget, bget;
+    mem_put_func lput, wput, bput;
+    /* Use xlateaddr to translate an Amiga address to a uae_u8 * that can
+     * be used to address memory without calling the wget/wput functions.
+     * This doesn't work for all memory banks, so this function may call
+     * abort(). */
+    xlate_func xlateaddr;
+    /* To prevent calls to abort(), use check before calling xlateaddr.
+     * It checks not only that the memory bank can do xlateaddr, but also
+     * that the pointer points to an area of at least the specified size.
+     * This is used for example to translate bitplane pointers in custom.c */
+    check_func check;
+    /* For those banks that refer to real memory, we can save the whole trouble
+       of going through function calls, and instead simply grab the memory
+       ourselves. This holds the memory address where the start of memory is
+       for this particular bank. */
+    uae_u8 *baseaddr;
+} addrbank;
+
+extern uae_u8 *filesysory;
+extern uae_u8 *rtarea;
+
+extern addrbank chipmem_bank;
+extern addrbank kickmem_bank;
+extern addrbank custom_bank;
+extern addrbank clock_bank;
+extern addrbank cia_bank;
+extern addrbank rtarea_bank;
+extern addrbank expamem_bank;
+extern addrbank fastmem_bank;
+extern addrbank gfxmem_bank;
+
+extern void rtarea_init (void);
+extern void rtarea_setup (void);
+extern void expamem_init (void);
+extern void expamem_reset (void);
+
+extern uae_u32 gfxmem_start;
+extern uae_u8 *gfxmemory;
+extern uae_u32 gfxmem_mask;
+extern int address_space_24;
+
+/* Default memory access functions */
+
+extern int default_check(uaecptr addr, uae_u32 size) REGPARAM;
+extern uae_u8 *default_xlate(uaecptr addr) REGPARAM;
+
+#define bankindex(addr) (((uaecptr)(addr)) >> 16)
+
+extern addrbank *mem_banks[65536];
+extern uae_u8 *baseaddr[65536];
+#define get_mem_bank(addr) (*mem_banks[bankindex(addr)])
+#define put_mem_bank(addr, b, realstart) do { \
+    (mem_banks[bankindex(addr)] = (b)); \
+    if ((b)->baseaddr) \
+        baseaddr[bankindex(addr)] = (b)->baseaddr - (realstart); \
+    else \
+        baseaddr[bankindex(addr)] = (uae_u8*)(((long)b)+1); \
+} while (0)
+
+extern void memory_init (void);
+extern void memory_cleanup (void);
+extern void map_banks (addrbank *bank, int first, int count, int realsize);
+
+#ifndef NO_INLINE_MEMORY_ACCESS
+
+#define longget(addr) (call_mem_get_func(get_mem_bank(addr).lget, addr))
+#define wordget(addr) (call_mem_get_func(get_mem_bank(addr).wget, addr))
+#define byteget(addr) (call_mem_get_func(get_mem_bank(addr).bget, addr))
+#define longput(addr,l) (call_mem_put_func(get_mem_bank(addr).lput, addr, l))
+#define wordput(addr,w) (call_mem_put_func(get_mem_bank(addr).wput, addr, w))
+#define byteput(addr,b) (call_mem_put_func(get_mem_bank(addr).bput, addr, b))
+
+#else
+
+extern uae_u32 alongget(uaecptr addr);
+extern uae_u32 awordget(uaecptr addr);
+extern uae_u32 longget(uaecptr addr);
+extern uae_u32 wordget(uaecptr addr);
+extern uae_u32 byteget(uaecptr addr);
+extern void longput(uaecptr addr, uae_u32 l);
+extern void wordput(uaecptr addr, uae_u32 w);
+extern void byteput(uaecptr addr, uae_u32 b);
+
+#endif
+
+
+#ifndef MD_HAVE_MEM_1_FUNCS
+
+#define longget_1 longget
+#define wordget_1 wordget
+#define byteget_1 byteget
+#define longput_1 longput
+#define wordput_1 wordput
+#define byteput_1 byteput
+
+#endif
+
+#if 0
+STATIC_INLINE uae_u32 get_long(uaecptr addr)
+{
+    return longget_1(addr);
+}
+STATIC_INLINE uae_u32 get_word(uaecptr addr)
+{
+    return wordget_1(addr);
+}
+STATIC_INLINE uae_u32 get_byte(uaecptr addr)
+{
+    return byteget_1(addr);
+}
+STATIC_INLINE void put_long(uaecptr addr, uae_u32 l)
+{
+    longput_1(addr, l);
+}
+STATIC_INLINE void put_word(uaecptr addr, uae_u32 w)
+{
+    wordput_1(addr, w);
+}
+STATIC_INLINE void put_byte(uaecptr addr, uae_u32 b)
+{
+    byteput_1(addr, b);
+}
+#endif
+
+STATIC_INLINE uae_u8 *phys_get_real_address(uaecptr addr)
+{
+    return get_mem_bank(addr).xlateaddr(addr);
+}
+
+STATIC_INLINE int phys_valid_address(uaecptr addr, uae_u32 size)
+{
+    return get_mem_bank(addr).check(addr, size);
+}
+
+
+/* For faster access in custom chip emulation.  */
+extern uae_u32 chipmem_lget (uaecptr) REGPARAM;
+extern uae_u32 chipmem_wget (uaecptr) REGPARAM;
+extern uae_u32 chipmem_bget (uaecptr) REGPARAM;
+extern void chipmem_lput (uaecptr, uae_u32) REGPARAM;
+extern void chipmem_wput (uaecptr, uae_u32) REGPARAM;
+extern void chipmem_bput (uaecptr, uae_u32) REGPARAM;
+
+#ifdef NATMEM_OFFSET
+
+typedef struct shmpiece_reg {
+    uae_u8 *native_address;
+    int id;
+    uae_u32 size;
+    struct shmpiece_reg *next;
+    struct shmpiece_reg *prev;
+} shmpiece;
+
+extern shmpiece *shm_start;
+extern int canbang;
+
+#endif
+
+extern uae_u8 *mapped_malloc (size_t, char *);
+extern void mapped_free (uae_u8 *);
diff -urN src-0.8.22/src/include/mmu.h src-0.8.22-mmu/src/include/mmu.h
--- src-0.8.22/src/include/mmu.h	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/include/mmu.h	2003-07-25 12:38:43.000000000 +0200
@@ -0,0 +1,245 @@
+
+#define MMU_TEST_PTEST		1
+#define MMU_TEST_VERBOSE	2
+#define MMU_TEST_FORCE_TABLE_SEARCH	4
+#define MMU_TEST_NO_BUSERR	8
+
+#define HAVE_MMU	(currprefs.cpu_level == 4)
+
+extern void mmu_dump_tables(void);
+
+#define MMU_PAGE_8KB	1
+#define MMU_PAGE_4KB	0
+
+#define MMU_TTR_LOGICAL_BASE				0xff000000
+#define MMU_TTR_LOGICAL_MASK				0x00ff0000
+#define MMU_TTR_BIT_ENABLED					(1 << 15)
+#define MMU_TTR_BIT_SFIELD_ENABLED			(1 << 14)
+#define MMU_TTR_BIT_SFIELD_SUPER			(1 << 13)
+#define MMU_TTR_SFIELD_SHIFT				13
+#define MMU_TTR_UX_MASK						((1 << 9) | (1 << 8))
+#define MMU_TTR_UX_SHIFT					8
+#define MMU_TTR_CACHE_MASK				((1 << 6) | (1 << 5))
+#define MMU_TTR_CACHE_SHIFT						5
+#define MMU_TTR_BIT_WRITE_PROTECT				(1 << 2)
+
+#define MMU_UDT_MASK	3
+#define MMU_PDT_MASK	3
+
+#define MMU_DES_WP			4
+#define MMU_DES_USED		8
+
+/* page descriptors only */
+#define MMU_DES_MODIFIED	16
+#define MMU_DES_SUPER		(1 << 7)
+#define MMU_DES_GLOBAL		(1 << 10)
+
+#define MMU_ROOT_PTR_ADDR_MASK				0xfffffe00
+#define MMU_PTR_PAGE_ADDR_MASK_8			0xffffff80
+#define MMU_PTR_PAGE_ADDR_MASK_4			0xffffff00
+
+#define MMU_PAGE_INDIRECT_MASK				0xfffffffc
+#define MMU_PAGE_ADDR_MASK_8				0xffffe000
+#define MMU_PAGE_ADDR_MASK_4				0xfffff000
+#define MMU_PAGE_UR_MASK_8					((1 << 12) | (1 << 11))
+#define MMU_PAGE_UR_MASK_4					(1 << 11)
+#define MMU_PAGE_UR_SHIFT					11
+
+#define MMU_MMUSR_ADDR_MASK	0xfffff000
+#define MMU_MMUSR_B			(1 << 11)
+#define MMU_MMUSR_G			(1 << 10)
+#define MMU_MMUSR_U1		(1 << 9)
+#define MMU_MMUSR_U0		(1 << 8)
+#define MMU_MMUSR_S			(1 << 7)
+#define MMU_MMUSR_CM		(1 << 6) | ( 1 << 5)
+#define MMU_MMUSR_M			(1 << 4)
+#define MMU_MMUSR_W			(1 << 2)
+#define MMU_MMUSR_R			(1 << 1)
+#define MMU_MMUSR_T			(1 << 0)
+
+struct mmu_atc_line	{
+	int	v, umode, g, s, cm, m, w, r, fc2;
+	uaecptr phys, log;
+};
+
+extern struct mmu_atc_line atc[64];
+
+#define TTR_I0	4
+#define TTR_I1	5
+#define TTR_D0	6
+#define TTR_D1	7
+
+#define TTR_NO_MATCH	0
+#define TTR_NO_WRITE	1
+#define TTR_OK_MATCH	2
+
+STATIC_INLINE void mmu_set_tc(uae_u16 tc)
+{
+	extern void activate_debugger (void);
+	regs.tc = tc;
+
+	if (currprefs.cpu_level >= 4)
+	{
+#if 0
+		if (tc & 0x8000)
+		{
+			uaecptr nextpc;
+			m68k_disasm(stdout, m68k_getpc(), &nextpc, 10);
+		}
+#endif
+
+		regs.mmu_enabled = tc & 0x8000 ? 1 : 0;
+		regs.mmu_pagesize = tc & 0x4000 ? MMU_PAGE_8KB : MMU_PAGE_4KB;
+
+		write_log("MMU: enabled=%d page=%d\n", regs.mmu_enabled, regs.mmu_pagesize);
+
+
+	}
+	
+}
+
+extern void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode);
+
+STATIC_INLINE void mmu_set_ttr(int regno, uae_u32 val)
+{
+	uae_u32 * ttr;
+	switch(regno)	{
+		case TTR_I0:	ttr = &regs.itt0;	break;
+		case TTR_I1:	ttr = &regs.itt1;	break;
+		case TTR_D0:	ttr = &regs.dtt0;	break;
+		case TTR_D1:	ttr = &regs.dtt1;	break;
+		default: abort();
+	}
+	*ttr = val;
+}
+
+STATIC_INLINE void mmu_set_mmusr(uae_u32 val)
+{
+	regs.mmusr = val;
+}
+
+STATIC_INLINE void mmu_set_root_pointer(int regno, uae_u32 val)
+{
+	uae_u32 * rp;
+	switch(regno)	{
+		case 0x806:	rp = &regs.urp;	break;
+		case 0x807: rp = &regs.srp; break;
+		default: abort();
+	}
+	*rp = val;
+}
+
+
+/* MMU related stuff */
+
+#if 0 /* later, for speedup */
+extern uae_u32 (*log_get_long)(uaecptr addr);
+extern void (*log_put_long)(uaecptr addr, uae_u32 l);
+extern uae_u16 (*log_get_word)(uaecptr addr);
+extern void (*log_put_word)(uaecptr addr, uae_u16 w);
+extern uae_u8 (*log_get_byte)(uaecptr addr);
+extern void (*log_put_byte)(uaecptr addr, uae_u8 b);
+#endif
+
+#define phys_get_long(addr)	longget_1(addr)
+#define phys_get_word(addr)	wordget_1(addr)
+#define phys_get_byte(addr)	byteget_1(addr)
+#define phys_put_long(addr,l)	longput_1(addr,l)
+#define phys_put_word(addr,w)	wordput_1(addr,w)
+#define phys_put_byte(addr,b)	byteput_1(addr,b)
+
+STATIC_INLINE uae_u32 get_long(uaecptr addr)
+{
+	return phys_get_long(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 0, m68k_getpc(), sz_long, 0)
+			: addr);
+}
+STATIC_INLINE uae_u16 get_word(uaecptr addr)
+{
+	return phys_get_word(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 0, m68k_getpc(), sz_word, 0)
+			: addr);
+}
+STATIC_INLINE uae_u8 get_byte(uaecptr addr)
+{
+	return phys_get_byte(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 0, m68k_getpc(), sz_byte, 0)
+			: addr);
+}
+
+STATIC_INLINE void put_long(uaecptr addr, uae_u32 l)
+{
+	phys_put_long(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 1, m68k_getpc(), sz_long, 0)
+			: addr,
+			l);
+}
+STATIC_INLINE void put_word(uaecptr addr, uae_u16 w)
+{
+	phys_put_word(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 1, m68k_getpc(), sz_word, 0)
+			: addr,
+			w);
+}
+STATIC_INLINE void put_byte(uaecptr addr, uae_u16 b)
+{
+	phys_put_byte(HAVE_MMU ?
+			mmu_translate(addr, FC_DATA, 1, m68k_getpc(), sz_byte, 0)
+			: addr,
+			b);
+}
+
+STATIC_INLINE uae_u8 *get_real_address(uaecptr addr)
+{
+    return phys_get_real_address(HAVE_MMU ? mmu_translate(addr, FC_DATA, 0, 0, sz_byte, 0) : addr);
+}
+
+STATIC_INLINE int valid_address(uaecptr addr, uae_u32 size)
+{
+    return phys_valid_address(HAVE_MMU ? mmu_translate(addr, FC_DATA, 0, 0, sz_byte, 0) : addr, size);
+}
+
+
+STATIC_INLINE uae_u32 sfc_get_long(uaecptr addr)
+{
+	return phys_get_long(HAVE_MMU ?
+			mmu_translate(addr, regs.sfc, 0, m68k_getpc(), sz_long, 0)
+			: addr);
+}
+STATIC_INLINE uae_u16 sfc_get_word(uaecptr addr)
+{
+	return phys_get_word(HAVE_MMU ?
+			mmu_translate(addr, regs.sfc, 0, m68k_getpc(), sz_word, 0)
+			: addr);
+}
+STATIC_INLINE uae_u8 sfc_get_byte(uaecptr addr)
+{
+	return phys_get_byte(HAVE_MMU ?
+			mmu_translate(addr, regs.sfc, 0, m68k_getpc(), sz_byte, 0)
+			: addr);
+}
+
+
+STATIC_INLINE void dfc_put_long(uaecptr addr, uae_u32 l)
+{
+	phys_put_long(HAVE_MMU ?
+			mmu_translate(addr, regs.dfc, 1, m68k_getpc(), sz_long, 0)
+			: addr,
+			l);
+}
+STATIC_INLINE void dfc_put_word(uaecptr addr, uae_u16 w)
+{
+	phys_put_word(HAVE_MMU ?
+			mmu_translate(addr, regs.dfc, 1, m68k_getpc(), sz_word, 0)
+			: addr,
+			w);
+}
+STATIC_INLINE void dfc_put_byte(uaecptr addr, uae_u16 b)
+{
+	phys_put_byte(HAVE_MMU ?
+			mmu_translate(addr, regs.dfc, 1, m68k_getpc(), sz_byte, 0)
+			: addr,
+			b);
+}
+
+
diff -urN src-0.8.22/src/include/newcpu.h src-0.8.22-mmu/src/include/newcpu.h
--- src-0.8.22/src/include/newcpu.h	2001-12-17 19:38:38.000000000 +0100
+++ src-0.8.22-mmu/src/include/newcpu.h	2003-07-25 12:29:03.000000000 +0200
@@ -8,6 +8,7 @@
 
 #include "readcpu.h"
 #include "machdep/m68k.h"
+#include <setjmp.h>
 
 #ifndef SET_CFLG
 
@@ -99,6 +100,12 @@
 
     uae_u32 prefetch_pc;
     uae_u32 prefetch;
+
+    uae_u32 caar, cacr, itt0, itt1, dtt0, dtt1, tc, mmusr, urp, srp;
+
+    int mmu_enabled, mmu_pagesize;
+    uae_u32 mmu_fslw, mmu_fault_addr;
+    uae_u16 mmu_ssw;
 } regs, lastint_regs;
 
 STATIC_INLINE void set_special (uae_u32 x)
@@ -114,6 +121,37 @@
 #define m68k_dreg(r,num) ((r).regs[(num)])
 #define m68k_areg(r,num) (((r).regs + 8)[(num)])
 
+STATIC_INLINE uaecptr m68k_getpc (void)
+{
+    return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp);
+}
+
+STATIC_INLINE uaecptr m68k_getpc_p (uae_u8 *p)
+{
+    return regs.pc + ((char *)p - (char *)regs.pc_oldp);
+}
+
+
+extern void Exception (int, uaecptr);
+extern jmp_buf m68k_exception;
+extern int in_exception_2;
+extern void m68k_dumpstate (FILE *, uaecptr *);
+extern void m68k_disasm (FILE *, uaecptr, uaecptr *, int);
+
+/* function codes for mmu_translation */
+
+#define FC_DATA	regs.s ? 5 : 1
+#define FC_INST regs.s ? 6 : 2
+
+extern uaecptr mmu_translate(uaecptr addr,
+		int fc,
+		int write,
+		uaecptr pc,
+		int size,	/* sz_xxx */
+		int test
+		) REGPARAM;
+#include "mmu.h"
+
 #if !defined USE_COMPILER
 STATIC_INLINE void m68k_setpc (uaecptr newpc)
 {
@@ -124,19 +162,64 @@
 extern void m68k_setpc (uaecptr newpc);
 #endif
 
-STATIC_INLINE uaecptr m68k_getpc (void)
+
+#if 0
+#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1))
+#define get_iword(o) do_get_mem_word((uae_u16 *)(regs.pc_p + (o)))
+#define get_ilong(o) do_get_mem_long((uae_u32 *)(regs.pc_p + (o)))
+#endif
+
+STATIC_INLINE uae_u8 get_ibyte(uae_u32 o)
 {
-    return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp);
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o + 1;
+	return phys_get_byte(mmu_translate(addr, FC_INST, 0, addr, sz_byte, 0));
+    }
+    return do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1));
+}
+STATIC_INLINE uae_u16 get_iword(uae_u32 o)
+{
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o;
+	return phys_get_word(mmu_translate(addr, FC_INST, 0, addr, sz_word, 0));
+    }
+    return do_get_mem_word((uae_u16 *)(regs.pc_p + (o)));
+}
+STATIC_INLINE uae_u32 get_ilong(uae_u32 o)
+{
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o;
+	return phys_get_long(mmu_translate(addr, FC_INST, 0, addr, sz_long, 0));
+    }
+    return do_get_mem_long((uae_u32 *)(regs.pc_p + (o)));
 }
 
-STATIC_INLINE uaecptr m68k_getpc_p (uae_u8 *p)
+
+STATIC_INLINE uae_u8 get_ibyte_1(uae_u32 o)
 {
-    return regs.pc + ((char *)p - (char *)regs.pc_oldp);
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o + 1;
+	return phys_get_byte(mmu_translate(addr, FC_INST, 0, addr, sz_byte, 0));
+    }
+    return byteget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o) + 1);
+}
+STATIC_INLINE uae_u16 get_iword_1(uae_u32 o)
+{
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o;
+	return phys_get_word(mmu_translate(addr, FC_INST, 0, addr, sz_word, 0));
+    }
+    return wordget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o));
 }
 
-#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1))
-#define get_iword(o) do_get_mem_word((uae_u16 *)(regs.pc_p + (o)))
-#define get_ilong(o) do_get_mem_long((uae_u32 *)(regs.pc_p + (o)))
+STATIC_INLINE uae_u32 get_ilong_1(uae_u32 o)
+{
+    if (HAVE_MMU) {
+        uaecptr addr = m68k_getpc() + o;
+	return phys_get_long(mmu_translate(addr, FC_INST, 0, addr, sz_long, 0));
+    }
+    return longget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o));
+}
 
 STATIC_INLINE void refill_prefetch (uae_u32 currpc, uae_u32 offs)
 {
@@ -144,6 +227,16 @@
     uae_s32 pc_p_offs = t - currpc;
     uae_u8 *ptr = regs.pc_p + pc_p_offs;
     uae_u32 r;
+
+    regs.prefetch_pc = t;
+
+    if (HAVE_MMU)	{
+        t = mmu_translate(t, FC_INST, 0, t, sz_long, 0);
+	r = phys_get_long(t);
+	do_put_mem_long(&regs.prefetch, r);
+	return;
+    }
+
 #ifdef UNALIGNED_PROFITABLE
     r = *(uae_u32 *)ptr;
     regs.prefetch = r;
@@ -152,7 +245,6 @@
     do_put_mem_long (&regs.prefetch, r);
 #endif
     /* printf ("PC %lx T %lx PCPOFFS %d R %lx\n", currpc, t, pc_p_offs, r); */
-    regs.prefetch_pc = t;
 }
 
 STATIC_INLINE uae_u32 get_ibyte_prefetch (uae_s32 o)
@@ -184,7 +276,7 @@
     v = do_get_mem_word ((uae_u16 *)(((uae_u8 *)&regs.prefetch) + offs));
     if (offs >= 2)
 	refill_prefetch (currpc, 4);
-    /* printf ("get_iword PC %lx ADDR %lx OFFS %lx V %lx\n", currpc, addr, offs, v); */
+/*    printf ("get_iword_prefetch PC %lx ADDR %lx OFFS %lx V %lx\n", currpc, addr, offs, v); */
     return v;
 }
 STATIC_INLINE uae_u32 get_ilong_prefetch (uae_s32 o)
@@ -252,7 +344,6 @@
 
 extern void MakeSR (void);
 extern void MakeFromSR (void);
-extern void Exception (int, uaecptr);
 extern void dump_counts (void);
 extern int m68k_move2c (int, uae_u32 *);
 extern int m68k_movec2 (int, uae_u32 *);
@@ -260,8 +351,6 @@
 extern void m68k_mull (uae_u32, uae_u32, uae_u16);
 extern void init_m68k (void);
 extern void m68k_go (int);
-extern void m68k_dumpstate (FILE *, uaecptr *);
-extern void m68k_disasm (FILE *, uaecptr, uaecptr *, int);
 extern void m68k_reset (void);
 
 extern void mmu_op (uae_u32, uae_u16);
diff -urN src-0.8.22/src/include/newcpu.h~ src-0.8.22-mmu/src/include/newcpu.h~
--- src-0.8.22/src/include/newcpu.h~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/include/newcpu.h~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,399 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * MC68000 emulation
+  *
+  * Copyright 1995 Bernd Schmidt
+  */
+
+#include "readcpu.h"
+#include "machdep/m68k.h"
+#include <setjmp.h>
+
+#ifndef SET_CFLG
+
+#define SET_CFLG(x) (CFLG = (x))
+#define SET_NFLG(x) (NFLG = (x))
+#define SET_VFLG(x) (VFLG = (x))
+#define SET_ZFLG(x) (ZFLG = (x))
+#define SET_XFLG(x) (XFLG = (x))
+
+#define GET_CFLG CFLG
+#define GET_NFLG NFLG
+#define GET_VFLG VFLG
+#define GET_ZFLG ZFLG
+#define GET_XFLG XFLG
+
+#define CLEAR_CZNV do { \
+ SET_CFLG (0); \
+ SET_ZFLG (0); \
+ SET_NFLG (0); \
+ SET_VFLG (0); \
+} while (0)
+
+#define COPY_CARRY (SET_XFLG (GET_CFLG))
+#endif
+
+extern int areg_byteinc[];
+extern int imm8_table[];
+
+extern int movem_index1[256];
+extern int movem_index2[256];
+extern int movem_next[256];
+
+extern int fpp_movem_index1[256];
+extern int fpp_movem_index2[256];
+extern int fpp_movem_next[256];
+
+extern int broken_in;
+
+typedef unsigned long cpuop_func (uae_u32) REGPARAM;
+
+struct cputbl {
+    cpuop_func *handler;
+    int specific;
+    uae_u16 opcode;
+};
+
+extern unsigned long op_illg (uae_u32) REGPARAM;
+
+typedef char flagtype;
+
+/* You can set this to long double to be more accurate. However, the
+   resulting alignment issues will cost a lot of performance in some
+   apps */
+#define USE_LONG_DOUBLE 0
+
+#if USE_LONG_DOUBLE
+typedef long double fptype;
+#else
+typedef double fptype;
+#endif
+
+extern struct regstruct
+{
+    uae_u32 regs[16];
+    uaecptr  usp,isp,msp;
+    uae_u16 sr;
+    flagtype t1;
+    flagtype t0;
+    flagtype s;
+    flagtype m;
+    flagtype x;
+    flagtype stopped;
+    int intmask;
+
+    uae_u32 pc;
+    uae_u8 *pc_p;
+    uae_u8 *pc_oldp;
+
+    uae_u32 vbr,sfc,dfc;
+
+    fptype fp[8];
+    fptype fp_result;
+
+    uae_u32 fpcr,fpsr,fpiar;
+    uae_u32 fpsr_highbyte;
+
+    uae_u32 spcflags;
+    uae_u32 kick_mask;
+
+    uae_u32 prefetch_pc;
+    uae_u32 prefetch;
+
+	uae_u32 caar, cacr, itt0, itt1, dtt0, dtt1, tc, mmusr, urp, srp;
+
+	int mmu_enabled, mmu_pagesize;
+	uae_u32 mmu_fslw, mmu_fault_addr;
+	uae_u16 mmu_ssw;
+} regs, lastint_regs;
+
+STATIC_INLINE void set_special (uae_u32 x)
+{
+    regs.spcflags |= x;
+}
+
+STATIC_INLINE void unset_special (uae_u32 x)
+{
+    regs.spcflags &= ~x;
+}
+
+#define m68k_dreg(r,num) ((r).regs[(num)])
+#define m68k_areg(r,num) (((r).regs + 8)[(num)])
+
+STATIC_INLINE uaecptr m68k_getpc (void)
+{
+    return regs.pc + ((char *)regs.pc_p - (char *)regs.pc_oldp);
+}
+
+STATIC_INLINE uaecptr m68k_getpc_p (uae_u8 *p)
+{
+    return regs.pc + ((char *)p - (char *)regs.pc_oldp);
+}
+
+
+extern void Exception (int, uaecptr);
+extern jmp_buf m68k_exception;
+extern int in_exception_2;
+extern void m68k_dumpstate (FILE *, uaecptr *);
+extern void m68k_disasm (FILE *, uaecptr, uaecptr *, int);
+
+/* function codes for mmu_translation */
+
+#define FC_DATA	regs.s ? 5 : 1
+#define FC_INST regs.s ? 6 : 2
+
+extern uaecptr mmu_translate(uaecptr addr,
+		int fc,
+		int write,
+		uaecptr pc,
+		int size,	/* sz_xxx */
+		int test
+		) REGPARAM;
+#include "mmu.h"
+
+#if !defined USE_COMPILER
+STATIC_INLINE void m68k_setpc (uaecptr newpc)
+{
+    regs.pc_p = regs.pc_oldp = get_real_address (newpc);
+    regs.pc = newpc;
+}
+#else
+extern void m68k_setpc (uaecptr newpc);
+#endif
+
+
+#if 0
+#define get_ibyte(o) do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1))
+#define get_iword(o) do_get_mem_word((uae_u16 *)(regs.pc_p + (o)))
+#define get_ilong(o) do_get_mem_long((uae_u32 *)(regs.pc_p + (o)))
+#endif
+
+STATIC_INLINE uae_u8 get_ibyte(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o + 1;
+		return phys_get_byte(mmu_translate(addr, FC_INST, 0, addr, sz_byte, 0));
+	}
+	return do_get_mem_byte((uae_u8 *)(regs.pc_p + (o) + 1));
+}
+STATIC_INLINE uae_u16 get_iword(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o;
+		return phys_get_word(mmu_translate(addr, FC_INST, 0, addr, sz_word, 0));
+	}
+	return do_get_mem_word((uae_u16 *)(regs.pc_p + (o)));
+}
+STATIC_INLINE uae_u32 get_ilong(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o;
+		return phys_get_long(mmu_translate(addr, FC_INST, 0, addr, sz_long, 0));
+	}
+	return do_get_mem_long((uae_u32 *)(regs.pc_p + (o)));
+}
+
+
+STATIC_INLINE uae_u8 get_ibyte_1(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o + 1;
+		return phys_get_byte(mmu_translate(addr, FC_INST, 0, addr, sz_byte, 0));
+	}
+	return byteget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o) + 1);
+}
+STATIC_INLINE uae_u16 get_iword_1(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o;
+		return phys_get_word(mmu_translate(addr, FC_INST, 0, addr, sz_word, 0));
+	}
+	return wordget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o));
+}
+
+STATIC_INLINE uae_u32 get_ilong_1(uae_u32 o)
+{
+	if (HAVE_MMU)	{
+		uaecptr addr = m68k_getpc() + o;
+		return phys_get_long(mmu_translate(addr, FC_INST, 0, addr, sz_long, 0));
+	}
+	return longget_1(regs.pc + (regs.pc_p - regs.pc_oldp) + (o));
+}
+
+
+
+
+STATIC_INLINE void refill_prefetch (uae_u32 currpc, uae_u32 offs)
+{
+    uae_u32 t = (currpc + offs) & ~3;
+    uae_s32 pc_p_offs = t - currpc;
+    uae_u8 *ptr = regs.pc_p + pc_p_offs;
+    uae_u32 r;
+
+    regs.prefetch_pc = t;
+
+	if (HAVE_MMU)	{
+		t = mmu_translate(t, FC_INST, 0, t, sz_long, 0);
+		r = phys_get_long(t);
+		do_put_mem_long(&regs.prefetch, r);
+		return;
+	}
+	
+#ifdef UNALIGNED_PROFITABLE
+    r = *(uae_u32 *)ptr;
+    regs.prefetch = r;
+#else
+    r = do_get_mem_long ((uae_u32 *)ptr);
+    do_put_mem_long (&regs.prefetch, r);
+#endif
+    /* printf ("PC %lx T %lx PCPOFFS %d R %lx\n", currpc, t, pc_p_offs, r); */
+}
+
+STATIC_INLINE uae_u32 get_ibyte_prefetch (uae_s32 o)
+{
+    uae_u32 currpc = m68k_getpc ();
+    uae_u32 addr = currpc + o + 1;
+    uae_u32 offs = addr - regs.prefetch_pc;
+    uae_u32 v;
+    if (offs > 3) {
+	refill_prefetch (currpc, o + 1);
+	offs = addr - regs.prefetch_pc;
+    }
+    v = do_get_mem_byte (((uae_u8 *)&regs.prefetch) + offs);
+    if (offs >= 2)
+	refill_prefetch (currpc, 4);
+    /* printf ("get_ibyte PC %lx ADDR %lx OFFS %lx V %lx\n", currpc, addr, offs, v); */
+    return v;
+}
+STATIC_INLINE uae_u32 get_iword_prefetch (uae_s32 o)
+{
+    uae_u32 currpc = m68k_getpc ();
+    uae_u32 addr = currpc + o;
+    uae_u32 offs = addr - regs.prefetch_pc;
+    uae_u32 v;
+    if (offs > 3) {
+	refill_prefetch (currpc, o);
+	offs = addr - regs.prefetch_pc;
+    }
+    v = do_get_mem_word ((uae_u16 *)(((uae_u8 *)&regs.prefetch) + offs));
+    if (offs >= 2)
+	refill_prefetch (currpc, 4);
+/*    printf ("get_iword_prefetch PC %lx ADDR %lx OFFS %lx V %lx\n", currpc, addr, offs, v); */
+    return v;
+}
+STATIC_INLINE uae_u32 get_ilong_prefetch (uae_s32 o)
+{
+    uae_u32 v = get_iword_prefetch (o);
+    v <<= 16;
+    v |= get_iword_prefetch (o + 2);
+    return v;
+}
+
+#define m68k_incpc(o) (regs.pc_p += (o))
+
+STATIC_INLINE void fill_prefetch_0 (void)
+{
+}
+
+#define fill_prefetch_2 fill_prefetch_0
+
+/* These are only used by the 68020/68881 code, and therefore don't
+ * need to handle prefetch.  */
+STATIC_INLINE uae_u32 next_ibyte (void)
+{
+    uae_u32 r = get_ibyte (0);
+    m68k_incpc (2);
+    return r;
+}
+
+STATIC_INLINE uae_u32 next_iword (void)
+{
+    uae_u32 r = get_iword (0);
+    m68k_incpc (2);
+    return r;
+}
+
+STATIC_INLINE uae_u32 next_ilong (void)
+{
+    uae_u32 r = get_ilong (0);
+    m68k_incpc (4);
+    return r;
+}
+
+#ifdef USE_COMPILER
+extern void m68k_setpc_fast (uaecptr newpc);
+extern void m68k_setpc_bcc (uaecptr newpc);
+extern void m68k_setpc_rte (uaecptr newpc);
+#else
+#define m68k_setpc_fast m68k_setpc
+#define m68k_setpc_bcc  m68k_setpc
+#define m68k_setpc_rte  m68k_setpc
+#endif
+
+STATIC_INLINE void m68k_setstopped (int stop)
+{
+    regs.stopped = stop;
+    /* A traced STOP instruction drops through immediately without
+       actually stopping.  */
+    if (stop && (regs.spcflags & SPCFLAG_DOTRACE) == 0)
+	regs.spcflags |= SPCFLAG_STOP;
+}
+
+extern uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp);
+extern uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp);
+
+extern uae_s32 ShowEA (FILE *, int reg, amodes mode, wordsizes size, char *buf);
+
+extern void MakeSR (void);
+extern void MakeFromSR (void);
+extern void dump_counts (void);
+extern int m68k_move2c (int, uae_u32 *);
+extern int m68k_movec2 (int, uae_u32 *);
+extern void m68k_divl (uae_u32, uae_u32, uae_u16, uaecptr);
+extern void m68k_mull (uae_u32, uae_u32, uae_u16);
+extern void init_m68k (void);
+extern void m68k_go (int);
+extern void m68k_reset (void);
+
+extern void mmu_op (uae_u32, uae_u16);
+
+extern void fpp_opp (uae_u32, uae_u16);
+extern void fdbcc_opp (uae_u32, uae_u16);
+extern void fscc_opp (uae_u32, uae_u16);
+extern void ftrapcc_opp (uae_u32,uaecptr);
+extern void fbcc_opp (uae_u32, uaecptr, uae_u32);
+extern void fsave_opp (uae_u32);
+extern void frestore_opp (uae_u32);
+
+/* Opcode of faulting instruction */
+extern uae_u16 last_op_for_exception_3;
+/* PC at fault time */
+extern uaecptr last_addr_for_exception_3;
+/* Address that generated the exception */
+extern uaecptr last_fault_for_exception_3;
+
+#define CPU_OP_NAME(a) op ## a
+
+/* 68040 */
+extern struct cputbl op_smalltbl_0_ff[];
+/* 68020 + 68881 */
+extern struct cputbl op_smalltbl_1_ff[];
+/* 68020 */
+extern struct cputbl op_smalltbl_2_ff[];
+/* 68010 */
+extern struct cputbl op_smalltbl_3_ff[];
+/* 68000 */
+extern struct cputbl op_smalltbl_4_ff[];
+/* 68000 slow but compatible.  */
+extern struct cputbl op_smalltbl_5_ff[];
+
+extern cpuop_func *cpufunctbl[65536] ASM_SYM_FOR_FUNC ("cpufunctbl");
+
+#ifdef JIT
+#else
+#define flush_icache(X) do {} while (0)
+#endif
+
+
+
diff -urN src-0.8.22/src/memory.c src-0.8.22-mmu/src/memory.c
--- src-0.8.22/src/memory.c	2002-02-16 15:29:42.000000000 +0100
+++ src-0.8.22-mmu/src/memory.c	2003-07-25 12:11:11.000000000 +0200
@@ -686,9 +686,10 @@
 
 uae_u8 REGPARAM2 *default_xlate (uaecptr a)
 {
-    write_log ("Your Amiga program just did something terribly stupid\n");
-    uae_reset ();
-    return kickmem_xlate (get_long (0xF80000));	/* So we don't crash. */
+    write_log ("Your Amiga program just did something terribly stupid"
+	       "(xlate of %lx)\n", a);
+    Exception(2, 0);
+    longjmp(m68k_exception, 0);
 }
 
 /* Address banks */
diff -urN src-0.8.22/src/memory.c~ src-0.8.22-mmu/src/memory.c~
--- src-0.8.22/src/memory.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/memory.c~	2003-07-25 12:08:14.000000000 +0200
@@ -0,0 +1,1402 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * Memory management
+  *
+  * (c) 1995 Bernd Schmidt
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "uae.h"
+#include "memory.h"
+#include "ersatz.h"
+#include "zfile.h"
+#include "custom.h"
+#include "events.h"
+#include "newcpu.h"
+#include "autoconf.h"
+#include "savestate.h"
+
+#ifdef USE_MAPPED_MEMORY
+#include <sys/mman.h>
+#endif
+
+/* Set by each memory handler that does not simply access real memory.  */
+int special_mem;
+
+int ersatzkickfile = 0;
+
+uae_u32 allocated_chipmem;
+uae_u32 allocated_fastmem;
+uae_u32 allocated_bogomem;
+uae_u32 allocated_gfxmem;
+uae_u32 allocated_z3fastmem;
+uae_u32 allocated_a3000mem;
+
+static long chip_filepos;
+static long bogo_filepos;
+static long rom_filepos;
+
+addrbank *mem_banks[65536];
+
+/* This has two functions. It either holds a host address that, when added
+   to the 68k address, gives the host address corresponding to that 68k
+   address (in which case the value in this array is even), OR it holds the
+   same value as mem_banks, for those banks that have baseaddr==0. In that
+   case, bit 0 is set (the memory access routines will take care of it).  */
+
+uae_u8 *baseaddr[65536];
+
+#ifdef NO_INLINE_MEMORY_ACCESS
+__inline__ uae_u32 longget (uaecptr addr)
+{
+    return call_mem_get_func (get_mem_bank (addr).lget, addr);
+}
+__inline__ uae_u32 wordget (uaecptr addr)
+{
+    return call_mem_get_func (get_mem_bank (addr).wget, addr);
+}
+__inline__ uae_u32 byteget (uaecptr addr)
+{
+    return call_mem_get_func (get_mem_bank (addr).bget, addr);
+}
+__inline__ void longput (uaecptr addr, uae_u32 l)
+{
+    call_mem_put_func (get_mem_bank (addr).lput, addr, l);
+}
+__inline__ void wordput (uaecptr addr, uae_u32 w)
+{
+    call_mem_put_func (get_mem_bank (addr).wput, addr, w);
+}
+__inline__ void byteput (uaecptr addr, uae_u32 b)
+{
+    call_mem_put_func (get_mem_bank (addr).bput, addr, b);
+}
+#endif
+
+uae_u32 chipmem_mask, kickmem_mask, extendedkickmem_mask, bogomem_mask, a3000mem_mask;
+
+static int illegal_count;
+/* A dummy bank that only contains zeros */
+
+static uae_u32 dummy_lget (uaecptr) REGPARAM;
+static uae_u32 dummy_wget (uaecptr) REGPARAM;
+static uae_u32 dummy_bget (uaecptr) REGPARAM;
+static void dummy_lput (uaecptr, uae_u32) REGPARAM;
+static void dummy_wput (uaecptr, uae_u32) REGPARAM;
+static void dummy_bput (uaecptr, uae_u32) REGPARAM;
+static int dummy_check (uaecptr addr, uae_u32 size) REGPARAM;
+
+uae_u32 REGPARAM2 dummy_lget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal lget at %08lx\n", addr);
+	}
+    }
+
+    return 0xFFFFFFFF;
+}
+
+uae_u32 REGPARAM2 dummy_wget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal wget at %08lx\n", addr);
+	}
+    }
+
+    return 0xFFFF;
+}
+
+uae_u32 REGPARAM2 dummy_bget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal bget at %08lx\n", addr);
+	}
+    }
+
+    return 0xFF;
+}
+
+void REGPARAM2 dummy_lput (uaecptr addr, uae_u32 l)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal lput at %08lx\n", addr);
+	}
+    }
+}
+void REGPARAM2 dummy_wput (uaecptr addr, uae_u32 w)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal wput at %08lx\n", addr);
+	}
+    }
+}
+void REGPARAM2 dummy_bput (uaecptr addr, uae_u32 b)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal bput at %08lx\n", addr);
+	}
+    }
+}
+
+int REGPARAM2 dummy_check (uaecptr addr, uae_u32 size)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem) {
+	if (illegal_count < 20) {
+	    illegal_count++;
+	    write_log ("Illegal check at %08lx\n", addr);
+	}
+    }
+
+    return 0;
+}
+
+/* A3000 "motherboard resources" bank.  */
+static uae_u32 mbres_lget (uaecptr) REGPARAM;
+static uae_u32 mbres_wget (uaecptr) REGPARAM;
+static uae_u32 mbres_bget (uaecptr) REGPARAM;
+static void mbres_lput (uaecptr, uae_u32) REGPARAM;
+static void mbres_wput (uaecptr, uae_u32) REGPARAM;
+static void mbres_bput (uaecptr, uae_u32) REGPARAM;
+static int mbres_check (uaecptr addr, uae_u32 size) REGPARAM;
+
+static int mbres_val = 0;
+
+uae_u32 REGPARAM2 mbres_lget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal lget at %08lx\n", addr);
+
+    return 0;
+}
+
+uae_u32 REGPARAM2 mbres_wget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal wget at %08lx\n", addr);
+
+    return 0;
+}
+
+uae_u32 REGPARAM2 mbres_bget (uaecptr addr)
+{
+    special_mem |= S_READ;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal bget at %08lx\n", addr);
+
+    return (addr & 0xFFFF) == 3 ? mbres_val : 0;
+}
+
+void REGPARAM2 mbres_lput (uaecptr addr, uae_u32 l)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal lput at %08lx\n", addr);
+}
+void REGPARAM2 mbres_wput (uaecptr addr, uae_u32 w)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal wput at %08lx\n", addr);
+}
+void REGPARAM2 mbres_bput (uaecptr addr, uae_u32 b)
+{
+    special_mem |= S_WRITE;
+    if (currprefs.illegal_mem)
+	write_log ("Illegal bput at %08lx\n", addr);
+
+    if ((addr & 0xFFFF) == 3)
+	mbres_val = b;
+}
+
+int REGPARAM2 mbres_check (uaecptr addr, uae_u32 size)
+{
+    if (currprefs.illegal_mem)
+	write_log ("Illegal check at %08lx\n", addr);
+
+    return 0;
+}
+
+/* Chip memory */
+
+uae_u8 *chipmemory;
+
+static int chipmem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *chipmem_xlate (uaecptr addr) REGPARAM;
+
+uae_u32 REGPARAM2 chipmem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    m = (uae_u32 *)(chipmemory + addr);
+    return do_get_mem_long (m);
+}
+
+uae_u32 REGPARAM2 chipmem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    m = (uae_u16 *)(chipmemory + addr);
+    return do_get_mem_word (m);
+}
+
+uae_u32 REGPARAM2 chipmem_bget (uaecptr addr)
+{
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    return chipmemory[addr];
+}
+
+void REGPARAM2 chipmem_lput (uaecptr addr, uae_u32 l)
+{
+    uae_u32 *m;
+
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    m = (uae_u32 *)(chipmemory + addr);
+    do_put_mem_long (m, l);
+}
+
+void REGPARAM2 chipmem_wput (uaecptr addr, uae_u32 w)
+{
+    uae_u16 *m;
+
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    m = (uae_u16 *)(chipmemory + addr);
+    do_put_mem_word (m, w);
+}
+
+void REGPARAM2 chipmem_bput (uaecptr addr, uae_u32 b)
+{
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    chipmemory[addr] = b;
+}
+
+int REGPARAM2 chipmem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    return (addr + size) <= allocated_chipmem;
+}
+
+uae_u8 REGPARAM2 *chipmem_xlate (uaecptr addr)
+{
+    addr -= chipmem_start & chipmem_mask;
+    addr &= chipmem_mask;
+    return chipmemory + addr;
+}
+
+/* Slow memory */
+
+static uae_u8 *bogomemory;
+
+static uae_u32 bogomem_lget (uaecptr) REGPARAM;
+static uae_u32 bogomem_wget (uaecptr) REGPARAM;
+static uae_u32 bogomem_bget (uaecptr) REGPARAM;
+static void bogomem_lput (uaecptr, uae_u32) REGPARAM;
+static void bogomem_wput (uaecptr, uae_u32) REGPARAM;
+static void bogomem_bput (uaecptr, uae_u32) REGPARAM;
+static int bogomem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *bogomem_xlate (uaecptr addr) REGPARAM;
+
+uae_u32 REGPARAM2 bogomem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    m = (uae_u32 *)(bogomemory + addr);
+    return do_get_mem_long (m);
+}
+
+uae_u32 REGPARAM2 bogomem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    m = (uae_u16 *)(bogomemory + addr);
+    return do_get_mem_word (m);
+}
+
+uae_u32 REGPARAM2 bogomem_bget (uaecptr addr)
+{
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    return bogomemory[addr];
+}
+
+void REGPARAM2 bogomem_lput (uaecptr addr, uae_u32 l)
+{
+    uae_u32 *m;
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    m = (uae_u32 *)(bogomemory + addr);
+    do_put_mem_long (m, l);
+}
+
+void REGPARAM2 bogomem_wput (uaecptr addr, uae_u32 w)
+{
+    uae_u16 *m;
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    m = (uae_u16 *)(bogomemory + addr);
+    do_put_mem_word (m, w);
+}
+
+void REGPARAM2 bogomem_bput (uaecptr addr, uae_u32 b)
+{
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    bogomemory[addr] = b;
+}
+
+int REGPARAM2 bogomem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    return (addr + size) <= allocated_bogomem;
+}
+
+uae_u8 REGPARAM2 *bogomem_xlate (uaecptr addr)
+{
+    addr -= bogomem_start & bogomem_mask;
+    addr &= bogomem_mask;
+    return bogomemory + addr;
+}
+
+/* A3000 motherboard fast memory */
+
+static uae_u8 *a3000memory;
+
+static uae_u32 a3000mem_lget (uaecptr) REGPARAM;
+static uae_u32 a3000mem_wget (uaecptr) REGPARAM;
+static uae_u32 a3000mem_bget (uaecptr) REGPARAM;
+static void a3000mem_lput (uaecptr, uae_u32) REGPARAM;
+static void a3000mem_wput (uaecptr, uae_u32) REGPARAM;
+static void a3000mem_bput (uaecptr, uae_u32) REGPARAM;
+static int a3000mem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *a3000mem_xlate (uaecptr addr) REGPARAM;
+
+uae_u32 REGPARAM2 a3000mem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    m = (uae_u32 *)(a3000memory + addr);
+    return do_get_mem_long (m);
+}
+
+uae_u32 REGPARAM2 a3000mem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    m = (uae_u16 *)(a3000memory + addr);
+    return do_get_mem_word (m);
+}
+
+uae_u32 REGPARAM2 a3000mem_bget (uaecptr addr)
+{
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    return a3000memory[addr];
+}
+
+void REGPARAM2 a3000mem_lput (uaecptr addr, uae_u32 l)
+{
+    uae_u32 *m;
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    m = (uae_u32 *)(a3000memory + addr);
+    do_put_mem_long (m, l);
+}
+
+void REGPARAM2 a3000mem_wput (uaecptr addr, uae_u32 w)
+{
+    uae_u16 *m;
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    m = (uae_u16 *)(a3000memory + addr);
+    do_put_mem_word (m, w);
+}
+
+void REGPARAM2 a3000mem_bput (uaecptr addr, uae_u32 b)
+{
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    a3000memory[addr] = b;
+}
+
+int REGPARAM2 a3000mem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    return (addr + size) <= allocated_a3000mem;
+}
+
+uae_u8 REGPARAM2 *a3000mem_xlate (uaecptr addr)
+{
+    addr -= a3000mem_start & a3000mem_mask;
+    addr &= a3000mem_mask;
+    return a3000memory + addr;
+}
+
+/* Kick memory */
+
+uae_u8 *kickmemory;
+
+/*
+ * A1000 kickstart RAM handling
+ *
+ * RESET instruction unhides boot ROM and disables write protection
+ * write access to boot ROM hides boot ROM and enables write protection
+ *
+ */
+static int a1000_kickstart_mode;
+static uae_u8 *a1000_bootrom;
+static void a1000_handle_kickstart (int mode)
+{
+    if (mode == 0) {
+	a1000_kickstart_mode = 0;
+	memcpy (kickmemory, kickmemory + 262144, 262144);
+    } else {
+	a1000_kickstart_mode = 1;
+	memset (kickmemory, 0, 262144);
+	memcpy (kickmemory, a1000_bootrom, 8192);
+	memcpy (kickmemory + 131072, a1000_bootrom, 8192);
+    }
+}
+
+static uae_u32 kickmem_lget (uaecptr) REGPARAM;
+static uae_u32 kickmem_wget (uaecptr) REGPARAM;
+static uae_u32 kickmem_bget (uaecptr) REGPARAM;
+static void kickmem_lput (uaecptr, uae_u32) REGPARAM;
+static void kickmem_wput (uaecptr, uae_u32) REGPARAM;
+static void kickmem_bput (uaecptr, uae_u32) REGPARAM;
+static int kickmem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *kickmem_xlate (uaecptr addr) REGPARAM;
+
+uae_u32 REGPARAM2 kickmem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+    addr -= kickmem_start & kickmem_mask;
+    addr &= kickmem_mask;
+    m = (uae_u32 *)(kickmemory + addr);
+    return do_get_mem_long (m);
+}
+
+uae_u32 REGPARAM2 kickmem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+    addr -= kickmem_start & kickmem_mask;
+    addr &= kickmem_mask;
+    m = (uae_u16 *)(kickmemory + addr);
+    return do_get_mem_word (m);
+}
+
+uae_u32 REGPARAM2 kickmem_bget (uaecptr addr)
+{
+    addr -= kickmem_start & kickmem_mask;
+    addr &= kickmem_mask;
+    return kickmemory[addr];
+}
+
+void REGPARAM2 kickmem_lput (uaecptr addr, uae_u32 b)
+{
+    uae_u32 *m;
+    if (a1000_kickstart_mode) {
+	if (addr >= 0xfc0000) {
+	    addr -= kickmem_start & kickmem_mask;
+	    addr &= kickmem_mask;
+	    m = (uae_u32 *)(kickmemory + addr);
+	    do_put_mem_long (m, b);
+	    return;
+	} else
+	    a1000_handle_kickstart (0);
+    } else if (currprefs.illegal_mem)
+	write_log ("Illegal kickmem lput at %08lx\n", addr);
+}
+
+void REGPARAM2 kickmem_wput (uaecptr addr, uae_u32 b)
+{
+    uae_u16 *m;
+    if (a1000_kickstart_mode) {
+	if (addr >= 0xfc0000) {
+	    addr -= kickmem_start & kickmem_mask;
+	    addr &= kickmem_mask;
+	    m = (uae_u16 *)(kickmemory + addr);
+	    do_put_mem_word (m, b);
+	    return;
+	} else
+	    a1000_handle_kickstart (0);
+    } else if (currprefs.illegal_mem)
+	write_log ("Illegal kickmem wput at %08lx\n", addr);
+}
+
+void REGPARAM2 kickmem_bput (uaecptr addr, uae_u32 b)
+{
+    if (a1000_kickstart_mode) {
+	if (addr >= 0xfc0000) {
+	    addr -= kickmem_start & kickmem_mask;
+	    addr &= kickmem_mask;
+	    kickmemory[addr] = b;
+	    return;
+	} else
+	    a1000_handle_kickstart (0);
+    } else if (currprefs.illegal_mem)
+	write_log ("Illegal kickmem lput at %08lx\n", addr);
+}
+
+int REGPARAM2 kickmem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= kickmem_start & kickmem_mask;
+    addr &= kickmem_mask;
+    return (addr + size) <= kickmem_size;
+}
+
+uae_u8 REGPARAM2 *kickmem_xlate (uaecptr addr)
+{
+    addr -= kickmem_start & kickmem_mask;
+    addr &= kickmem_mask;
+    return kickmemory + addr;
+}
+
+/* CD32/CDTV extended kick memory */
+
+uae_u8 *extendedkickmemory;
+static int extendedkickmem_size;
+static uae_u32 extendedkickmem_start;
+
+#define EXTENDED_ROM_CD32 1
+#define EXTENDED_ROM_CDTV 2
+
+static int extromtype (void)
+{
+    switch (extendedkickmem_size) {
+    case 524288:
+	return EXTENDED_ROM_CD32;
+    case 262144:
+	return EXTENDED_ROM_CDTV;
+    }
+    return 0;
+}
+
+static uae_u32 extendedkickmem_lget (uaecptr) REGPARAM;
+static uae_u32 extendedkickmem_wget (uaecptr) REGPARAM;
+static uae_u32 extendedkickmem_bget (uaecptr) REGPARAM;
+static void extendedkickmem_lput (uaecptr, uae_u32) REGPARAM;
+static void extendedkickmem_wput (uaecptr, uae_u32) REGPARAM;
+static void extendedkickmem_bput (uaecptr, uae_u32) REGPARAM;
+static int extendedkickmem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *extendedkickmem_xlate (uaecptr addr) REGPARAM;
+
+uae_u32 REGPARAM2 extendedkickmem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+    addr -= extendedkickmem_start & extendedkickmem_mask;
+    addr &= extendedkickmem_mask;
+    m = (uae_u32 *)(extendedkickmemory + addr);
+    return do_get_mem_long (m);
+}
+
+uae_u32 REGPARAM2 extendedkickmem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+    addr -= extendedkickmem_start & extendedkickmem_mask;
+    addr &= extendedkickmem_mask;
+    m = (uae_u16 *)(extendedkickmemory + addr);
+    return do_get_mem_word (m);
+}
+
+uae_u32 REGPARAM2 extendedkickmem_bget (uaecptr addr)
+{
+    addr -= extendedkickmem_start & extendedkickmem_mask;
+    addr &= extendedkickmem_mask;
+    return extendedkickmemory[addr];
+}
+
+void REGPARAM2 extendedkickmem_lput (uaecptr addr, uae_u32 b)
+{
+    if (currprefs.illegal_mem)
+	write_log ("Illegal extendedkickmem lput at %08lx\n", addr);
+}
+
+void REGPARAM2 extendedkickmem_wput (uaecptr addr, uae_u32 b)
+{
+    if (currprefs.illegal_mem)
+	write_log ("Illegal extendedkickmem wput at %08lx\n", addr);
+}
+
+void REGPARAM2 extendedkickmem_bput (uaecptr addr, uae_u32 b)
+{
+    if (currprefs.illegal_mem)
+	write_log ("Illegal extendedkickmem lput at %08lx\n", addr);
+}
+
+int REGPARAM2 extendedkickmem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= extendedkickmem_start & extendedkickmem_mask;
+    addr &= extendedkickmem_mask;
+    return (addr + size) <= extendedkickmem_size;
+}
+
+uae_u8 REGPARAM2 *extendedkickmem_xlate (uaecptr addr)
+{
+    addr -= extendedkickmem_start & extendedkickmem_mask;
+    addr &= extendedkickmem_mask;
+    return extendedkickmemory + addr;
+}
+
+/* Default memory access functions */
+
+int REGPARAM2 default_check (uaecptr a, uae_u32 b)
+{
+    return 0;
+}
+
+uae_u8 REGPARAM2 *default_xlate (uaecptr a)
+{
+    write_log ("Your Amiga program just did something terribly stupid\n");
+    uae_reset ();
+    return kickmem_xlate (get_long (0xF80000));	/* So we don't crash. */
+}
+
+/* Address banks */
+
+addrbank dummy_bank = {
+    dummy_lget, dummy_wget, dummy_bget,
+    dummy_lput, dummy_wput, dummy_bput,
+    default_xlate, dummy_check, NULL
+};
+
+addrbank mbres_bank = {
+    mbres_lget, mbres_wget, mbres_bget,
+    mbres_lput, mbres_wput, mbres_bput,
+    default_xlate, mbres_check, NULL
+};
+
+addrbank chipmem_bank = {
+    chipmem_lget, chipmem_wget, chipmem_bget,
+    chipmem_lput, chipmem_wput, chipmem_bput,
+    chipmem_xlate, chipmem_check, NULL
+};
+
+addrbank bogomem_bank = {
+    bogomem_lget, bogomem_wget, bogomem_bget,
+    bogomem_lput, bogomem_wput, bogomem_bput,
+    bogomem_xlate, bogomem_check, NULL
+};
+
+addrbank a3000mem_bank = {
+    a3000mem_lget, a3000mem_wget, a3000mem_bget,
+    a3000mem_lput, a3000mem_wput, a3000mem_bput,
+    a3000mem_xlate, a3000mem_check, NULL
+};
+
+addrbank kickmem_bank = {
+    kickmem_lget, kickmem_wget, kickmem_bget,
+    kickmem_lput, kickmem_wput, kickmem_bput,
+    kickmem_xlate, kickmem_check, NULL
+};
+
+addrbank extendedkickmem_bank = {
+    extendedkickmem_lget, extendedkickmem_wget, extendedkickmem_bget,
+    extendedkickmem_lput, extendedkickmem_wput, extendedkickmem_bput,
+    extendedkickmem_xlate, extendedkickmem_check, NULL
+};
+
+static int decode_cloanto_rom (uae_u8 *mem, int size, int real_size)
+{
+    FILE *keyf;
+    uae_u8 *p;
+    long cnt, t;
+    int keysize;
+
+    if (strlen (currprefs.keyfile) == 0) {
+	write_log ("No filename given for ROM key file and ROM image is an encrypted \"Amiga Forever\" ROM file.\n");
+	return 0;
+    }
+    keyf = zfile_open (currprefs.keyfile, "rb");
+    if (keyf == 0) {
+	write_log ("Could not find specified ROM key-file.\n");
+	return 0;
+    }
+
+    p = (uae_u8 *) xmalloc (524288);
+    keysize = fread (p, 1, 524288, keyf);
+    for (t = cnt = 0; cnt < size; cnt++, t = (t + 1) % keysize) {
+	mem[cnt] ^= p[t];
+	if (real_size == cnt + 1)
+	    t = keysize - 1;
+    }
+    fclose (keyf);
+    free (p);
+    return 1;
+}
+
+static int kickstart_checksum (uae_u8 *mem, int size)
+{
+    uae_u32 cksum = 0, prevck = 0;
+    int i;
+    for (i = 0; i < size; i += 4) {
+	uae_u32 data = mem[i] * 65536 * 256 + mem[i + 1] * 65536 + mem[i + 2] * 256 + mem[i + 3];
+	cksum += data;
+	if (cksum < prevck)
+	    cksum++;
+	prevck = cksum;
+    }
+    if (cksum != 0xFFFFFFFFul) {
+	write_log ("Kickstart checksum incorrect. You probably have a corrupted ROM image.\n");
+    }
+    return 0;
+}
+
+static int read_kickstart (FILE *f, uae_u8 *mem, int size, int dochecksum, int *cloanto_rom)
+{
+    unsigned char buffer[20];
+    int i, cr = 0;
+
+    if (cloanto_rom)
+	*cloanto_rom = 0;
+    i = fread (buffer, 1, 11, f);
+    if (strncmp ((char *) buffer, "AMIROMTYPE1", 11) != 0) {
+	fseek (f, 0, SEEK_SET);
+    } else {
+	cr = 1;
+    }
+
+    i = fread (mem, 1, size, f);
+    if (i == 8192) {
+	a1000_bootrom = malloc (8192);
+	memcpy (a1000_bootrom, kickmemory, 8192);
+	a1000_handle_kickstart (1);
+    } else if (i == size / 2) {
+	memcpy (mem + size / 2, mem, i);
+    } else if (i != size) {
+	write_log ("Error while reading Kickstart.\n");
+	zfile_close (f);
+	return 0;
+    }
+    zfile_close (f);
+
+    if (cr)
+	decode_cloanto_rom (mem, size, i);
+    if (dochecksum && i >= 262144)
+	kickstart_checksum (mem, size);
+    if (cloanto_rom)
+	*cloanto_rom = cr;
+    return 1;
+}
+
+static int load_extendedkickstart (void)
+{
+    FILE *f;
+    int size;
+
+    if (strlen (currprefs.romextfile) == 0)
+	return 0;
+    f = zfile_open (currprefs.romextfile, "rb");
+    if (!f) {
+	write_log ("No extended Kickstart ROM found");
+	return 0;
+    }
+
+    fseek (f, 0, SEEK_END);
+    size = ftell (f);
+    if (size > 300000)
+	extendedkickmem_size = 524288;
+    else
+	extendedkickmem_size = 262144;
+    fseek (f, 0, SEEK_SET);
+
+    switch (extromtype ()) {
+    case EXTENDED_ROM_CDTV:
+	extendedkickmemory = (uae_u8 *) mapped_malloc (extendedkickmem_size, "rom_f0");
+	extendedkickmem_bank.baseaddr = (uae_u8 *) extendedkickmemory;
+	break;
+    case EXTENDED_ROM_CD32:
+	extendedkickmemory = (uae_u8 *) mapped_malloc (extendedkickmem_size, "rom_e0");
+	extendedkickmem_bank.baseaddr = (uae_u8 *) extendedkickmemory;
+	break;
+    }
+    read_kickstart (f, extendedkickmemory, 524288, 0, 0);
+    fclose (f);
+    return 1;
+}
+
+
+static int load_kickstart (void)
+{
+    FILE *f = zfile_open (currprefs.romfile, "rb");
+
+    if (f == NULL) {
+#if defined(AMIGA)||defined(__POS__)
+#define USE_UAE_ERSATZ "USE_UAE_ERSATZ"
+	if (!getenv (USE_UAE_ERSATZ)) {
+	    write_log ("Using current ROM. (create ENV:%s to " "use uae's ROM replacement)\n", USE_UAE_ERSATZ);
+	    memcpy (kickmemory, (char *) 0x1000000 - kickmem_size, kickmem_size);
+	    kickstart_checksum (kickmemory, kickmem_size);
+	    goto chk_sum;
+	}
+#endif
+	return 0;
+    }
+
+    if (!read_kickstart (f, kickmemory, kickmem_size, 1, &cloanto_rom))
+	return 0;
+
+#if defined(AMIGA)
+  chk_sum:
+#endif
+
+    if (currprefs.kickshifter) {
+	/* Patch Kickstart ROM for ShapeShifter - from Christian Bauer.
+	   Changes 'lea $400,a0' to 'lea $2000,a0' for ShapeShifter compatability.
+	   NOTE: lea is 0x41f8, so we should do this better with a search for
+	   0x41f80400 --> 0x41f82000
+	*/
+	if (kickmemory[0x24a] == 0x04 && kickmemory[0x24b] == 0x00) {	/* Kick 3.0 */
+	    kickmemory[0x24a] = 0x20;
+	    kickmemory[0x7ffea] -= 0x1c;
+	    write_log ("Kickstart KickShifted\n");
+	} else if (kickmemory[0x26e] == 0x04 && kickmemory[0x26f] == 0x00) {	/* Kick 3.1 */
+	    kickmemory[0x26e] = 0x20;
+	    kickmemory[0x7ffea] -= 0x1c;
+	    write_log ("Kickstart KickShifted\n");
+	} else if (kickmemory[0x24e] == 0x04 && kickmemory[0x24f] == 0x00) {	/* Kick 2.04 */
+	    kickmemory[0x24e] = 0x20;
+	    kickmemory[0x7ffea] -= 0x1c;
+	    write_log ("Kickstart KickShifted\n");
+	}
+    }
+    return 1;
+}
+
+char *address_space, *good_address_map;
+int good_address_fd;
+
+#ifndef NATMEM_OFFSET
+
+uae_u8 *mapped_malloc (size_t s, char *file)
+{
+    return malloc (s);
+}
+
+void mapped_free (uae_u8 *p)
+{
+    free (p);
+}
+#else
+
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+shmpiece *shm_start = NULL;
+int canbang = 1;
+
+static void dumplist (void)
+{
+    shmpiece *x = shm_start;
+    printf ("Start Dump:\n");
+    while (x) {
+	printf ("  this=%p, Native %p, id %d, prev=%p, next=%p, size=0x%08x\n",
+		x, x->native_address, x->id, x->prev, x->next, x->size);
+	x = x->next;
+    }
+    printf ("End Dump:\n");
+}
+
+static shmpiece *find_shmpiece (uae_u8 *base)
+{
+    shmpiece *x = shm_start;
+
+    while (x && x->native_address != base)
+	x = x->next;
+    if (!x) {
+	printf ("NATMEM: Failure to find mapping at %p\n", base);
+	dumplist ();
+	canbang = 0;
+	return 0;
+    }
+    return x;
+}
+
+static void delete_shmmaps (uae_u32 start, uae_u32 size)
+{
+    if (!canbang)
+	return;
+
+    while (size) {
+	uae_u8 *base = mem_banks[bankindex (start)]->baseaddr;
+	if (base) {
+	    shmpiece *x;
+	    base = ((uae_u8 *) NATMEM_OFFSET) + start;
+
+	    x = find_shmpiece (base);
+	    if (!x)
+		return;
+
+	    if (x->size > size) {
+		printf ("NATMEM: Failure to delete mapping at %08x(size %08x, delsize %08x)\n", start, x->size, size);
+		dumplist ();
+		canbang = 0;
+		return;
+	    }
+	    shmdt (x->native_address);
+	    size -= x->size;
+	    start += x->size;
+	    if (x->next)
+		x->next->prev = x->prev;	/* remove this one from the list */
+	    if (x->prev)
+		x->prev->next = x->next;
+	    else
+		shm_start = x->next;
+	    free (x);
+	} else {
+	    size -= 0x10000;
+	    start += 0x10000;
+	}
+    }
+}
+
+static void add_shmmaps (uae_u32 start, addrbank *what)
+{
+    shmpiece *x = shm_start;
+    shmpiece *y;
+    uae_u8 *base = what->baseaddr;
+
+    if (!canbang)
+	return;
+    if (!base)
+	return;
+
+    x = find_shmpiece (base);
+    if (!x)
+	return;
+    y = malloc (sizeof (shmpiece));
+    *y = *x;
+    base = ((uae_u8 *) NATMEM_OFFSET) + start;
+    y->native_address = shmat (y->id, base, 0);
+    if (y->native_address == (void *) -1) {
+	printf ("NATMEM: Failure to map existing at %08x(%p)\n", start, base);
+	perror ("shmat");
+	dumplist ();
+	canbang = 0;
+	return;
+    }
+    y->next = shm_start;
+    y->prev = NULL;
+    if (y->next)
+	y->next->prev = y;
+    shm_start = y;
+}
+
+uae_u8 *mapped_malloc (size_t s, char *file)
+{
+    int id;
+    void *answer;
+    shmpiece *x;
+
+    if (!canbang)
+	return malloc (s);
+
+    id = shmget (IPC_PRIVATE, s, 0x1ff, file);
+    if (id == 1) {
+	canbang = 0;
+	return mapped_malloc (s, file);
+    }
+    answer = shmat (id, 0, 0);
+    shmctl (id, IPC_RMID, NULL);
+    if (answer != (void *) -1) {
+	x = malloc (sizeof (shmpiece));
+	x->native_address = answer;
+	x->id = id;
+	x->size = s;
+	x->next = shm_start;
+	x->prev = NULL;
+	if (x->next)
+	    x->next->prev = x;
+	shm_start = x;
+
+	return answer;
+    }
+    canbang = 0;
+    return mapped_malloc (s, file);
+}
+
+void mapped_free (uae_u8 *base)
+{
+    shmpiece *x = find_shmpiece (base);
+    if (!x)
+	abort ();
+    shmdt (x->native_address);
+}
+
+#endif
+
+static void init_mem_banks (void)
+{
+    int i;
+    for (i = 0; i < 65536; i++)
+	put_mem_bank (i << 16, &dummy_bank, 0);
+}
+
+static void allocate_memory (void)
+{
+    if (allocated_chipmem != currprefs.chipmem_size) {
+	if (chipmemory)
+	    mapped_free (chipmemory);
+	chipmemory = 0;
+
+	allocated_chipmem = currprefs.chipmem_size;
+	chipmem_mask = allocated_chipmem - 1;
+
+	chipmemory = mapped_malloc (allocated_chipmem, "chip");
+	if (chipmemory == 0) {
+	    write_log ("Fatal error: out of memory for chipmem.\n");
+	    allocated_chipmem = 0;
+	} else
+	    do_put_mem_long ((uae_u32 *)(chipmemory + 4), 0);
+    }
+
+    if (allocated_bogomem != currprefs.bogomem_size) {
+	if (bogomemory)
+	    mapped_free (bogomemory);
+	bogomemory = 0;
+
+	allocated_bogomem = currprefs.bogomem_size;
+	bogomem_mask = allocated_bogomem - 1;
+
+	if (allocated_bogomem) {
+	    bogomemory = mapped_malloc (allocated_bogomem, "bogo");
+	    if (bogomemory == 0) {
+		write_log ("Out of memory for bogomem.\n");
+		allocated_bogomem = 0;
+	    }
+	}
+    }
+    if (allocated_a3000mem != currprefs.a3000mem_size) {
+	if (a3000memory)
+	    mapped_free (a3000memory);
+	a3000memory = 0;
+
+	allocated_a3000mem = currprefs.a3000mem_size;
+	a3000mem_mask = allocated_a3000mem - 1;
+
+	if (allocated_a3000mem) {
+	    a3000memory = mapped_malloc (allocated_a3000mem, "a3000");
+	    if (a3000memory == 0) {
+		write_log ("Out of memory for a3000mem.\n");
+		allocated_a3000mem = 0;
+	    }
+	}
+    }
+
+    if (savestate_state == STATE_RESTORE) {
+	fseek (savestate_file, chip_filepos, SEEK_SET);
+	fread (chipmemory, 1, allocated_chipmem, savestate_file);
+	if (allocated_bogomem > 0) {
+	    fseek (savestate_file, bogo_filepos, SEEK_SET);
+	    fread (bogomemory, 1, allocated_bogomem, savestate_file);
+	}
+    }
+    chipmem_bank.baseaddr = chipmemory;
+    bogomem_bank.baseaddr = bogomemory;
+}
+
+void memory_reset (void)
+{
+    int i, custom_start;
+
+#ifdef NATMEM_OFFSET
+    delete_shmmaps (0, 0xFFFF0000);
+#endif
+    init_mem_banks ();
+
+    currprefs.chipmem_size = changed_prefs.chipmem_size;
+    currprefs.bogomem_size = changed_prefs.bogomem_size;
+    currprefs.a3000mem_size = changed_prefs.a3000mem_size;
+
+    allocate_memory ();
+
+    if (strcmp (currprefs.romfile, changed_prefs.romfile) != 0 || strcmp (currprefs.keyfile, changed_prefs.keyfile) != 0) {
+	ersatzkickfile = 0;
+	memcpy (currprefs.romfile, changed_prefs.romfile, sizeof currprefs.romfile);
+	memcpy (currprefs.keyfile, changed_prefs.keyfile, sizeof currprefs.keyfile);
+	/* Clear out whatever data remains across a reset.  */
+	memset (chipmemory, 0, allocated_chipmem);
+	if (!load_kickstart ()) {
+	    init_ersatz_rom (kickmemory);
+	    ersatzkickfile = 1;
+	}
+    }
+    /* Map the chipmem into all of the lower 8MB */
+    i = allocated_chipmem > 0x200000 ? (allocated_chipmem >> 16) : 32;
+    map_banks (&chipmem_bank, 0x00, i, allocated_chipmem);
+
+    custom_start = 0xC0;
+
+    map_banks (&custom_bank, custom_start, 0xE0 - custom_start, 0);
+    map_banks (&cia_bank, 0xA0, 32, 0);
+    map_banks (&clock_bank, 0xDC, 1, 0);
+
+    /* @@@ Does anyone have a clue what should be in the 0x200000 - 0xA00000
+     * range on an Amiga without expansion memory?  */
+    custom_start = allocated_chipmem >> 16;
+    if (custom_start < 0x20)
+	custom_start = 0x20;
+    map_banks (&dummy_bank, custom_start, 0xA0 - custom_start, 0);
+    /*map_banks (&mbres_bank, 0xDE, 1); */
+
+    if (bogomemory != 0) {
+	int t = allocated_bogomem >> 16;
+	if (t > 0x1C)
+	    t = 0x1C;
+	map_banks (&bogomem_bank, 0xC0, t, allocated_bogomem);
+    }
+    if (a3000memory != 0)
+	map_banks (&a3000mem_bank, a3000mem_start >> 16, allocated_a3000mem >> 16, allocated_a3000mem);
+
+    map_banks (&rtarea_bank, RTAREA_BASE >> 16, 1, 0);
+
+    map_banks (&kickmem_bank, 0xF8, 8, 0);
+    if (a1000_bootrom)
+	a1000_handle_kickstart (1);
+    map_banks (&expamem_bank, 0xE8, 1, 0);
+
+    switch (extromtype ()) {
+    case EXTENDED_ROM_CDTV:
+	map_banks (&extendedkickmem_bank, 0xF0, 4, 0);
+	break;
+    case EXTENDED_ROM_CD32:
+	map_banks (&extendedkickmem_bank, 0xE0, 8, 0);
+	break;
+    default:
+	if (cloanto_rom)
+	    map_banks (&kickmem_bank, 0xE0, 8, 0);
+    }
+}
+
+void memory_init (void)
+{
+    allocated_chipmem = 0;
+    allocated_bogomem = 0;
+    allocated_a3000mem = 0;
+    kickmemory = 0;
+    extendedkickmemory = 0;
+    chipmemory = 0;
+    a3000memory = 0;
+    bogomemory = 0;
+
+    kickmemory = mapped_malloc (kickmem_size, "kick");
+    kickmem_bank.baseaddr = kickmemory;
+
+    load_extendedkickstart ();
+    if (!load_kickstart ()) {
+	init_ersatz_rom (kickmemory);
+	ersatzkickfile = 1;
+    }
+
+    init_mem_banks ();
+    memory_reset ();
+
+    kickmem_mask = kickmem_size - 1;
+    extendedkickmem_mask = extendedkickmem_size - 1;
+}
+
+void memory_cleanup (void)
+{
+    if (a3000memory)
+	mapped_free (a3000memory);
+    if (bogomemory)
+	mapped_free (bogomemory);
+    if (kickmemory)
+	mapped_free (kickmemory);
+    if (a1000_bootrom)
+	free (a1000_bootrom);
+    if (chipmemory)
+	mapped_free (chipmemory);
+
+    a3000memory = 0;
+    bogomemory = 0;
+    kickmemory = 0;
+    a1000_bootrom = 0;
+    chipmemory = 0;
+}
+
+void map_banks (addrbank *bank, int start, int size, int realsize)
+{
+    int bnr;
+    unsigned long int hioffs = 0, endhioffs = 0x100;
+    addrbank *orgbank = bank;
+    uae_u32 realstart = start;
+
+    flush_icache (1);		/* Sure don't want to keep any old mappings around! */
+#ifdef NATMEM_OFFSET
+    delete_shmmaps (start << 16, size << 16);
+#endif
+
+    if (!realsize)
+	realsize = size << 16;
+
+    if ((size << 16) < realsize) {
+	write_log ("Please report to bmeyer@cs.monash.edu.au, and mention:\n");
+	write_log ("Broken mapping, size=%x, realsize=%x\n", size, realsize);
+	write_log ("Start is %x\n", start);
+	write_log ("Reducing memory sizes, especially chipmem, may fix this problem\n");
+	abort ();
+    }
+
+    if (start >= 0x100) {
+	int real_left = 0;
+	for (bnr = start; bnr < start + size; bnr++) {
+	    if (!real_left) {
+		realstart = bnr;
+		real_left = realsize >> 16;
+#ifdef NATMEM_OFFSET
+		add_shmmaps (realstart << 16, bank);
+#endif
+	    }
+	    put_mem_bank (bnr << 16, bank, realstart << 16);
+	    real_left--;
+	}
+	return;
+    }
+    if (currprefs.address_space_24)
+	endhioffs = 0x10000;
+    for (hioffs = 0; hioffs < endhioffs; hioffs += 0x100) {
+	int real_left = 0;
+	for (bnr = start; bnr < start + size; bnr++) {
+	    if (!real_left) {
+		realstart = bnr + hioffs;
+		real_left = realsize >> 16;
+#ifdef NATMEM_OFFSET
+		add_shmmaps (realstart << 16, bank);
+#endif
+	    }
+	    put_mem_bank ((bnr + hioffs) << 16, bank, realstart << 16);
+	    real_left--;
+	}
+    }
+}
+
+
+/* memory save/restore code */
+
+uae_u8 *save_cram (int *len)
+{
+    *len = allocated_chipmem;
+    return chipmemory;
+}
+
+uae_u8 *save_bram (int *len)
+{
+    *len = allocated_bogomem;
+    return bogomemory;
+}
+
+void restore_cram (int len, long filepos)
+{
+    chip_filepos = filepos;
+    changed_prefs.chipmem_size = len;
+}
+
+void restore_bram (int len, long filepos)
+{
+    bogo_filepos = filepos;
+    changed_prefs.bogomem_size = len;
+}
+
+uae_u8 *restore_rom (uae_u8 *src)
+{
+    restore_u32 ();
+    restore_u32 ();
+    restore_u32 ();
+    restore_u32 ();
+    restore_u32 ();
+
+    return src;
+}
+
+uae_u8 *save_rom (int first, int *len)
+{
+    static int count;
+    uae_u8 *dst, *dstbak;
+    uae_u8 *mem_real_start;
+    int mem_start, mem_size, mem_type, i, saverom;
+
+    saverom = 0;
+    if (first)
+	count = 0;
+    for (;;) {
+	mem_type = count;
+	switch (count) {
+	case 0:		/* Kickstart ROM */
+	    mem_start = 0xf80000;
+	    mem_real_start = kickmemory;
+	    mem_size = kickmem_size;
+	    /* 256KB or 512KB ROM? */
+	    for (i = 0; i < mem_size / 2 - 4; i++) {
+		if (longget (i + mem_start) != longget (i + mem_start + mem_size / 2))
+		    break;
+	    }
+	    if (i == mem_size / 2 - 4) {
+		mem_size /= 2;
+		mem_start += 262144;
+	    }
+	    mem_type = 0;
+	    break;
+	default:
+	    return 0;
+	}
+	count++;
+	if (mem_size)
+	    break;
+    }
+    dstbak = dst = malloc (4 + 4 + 4 + 4 + 4 + mem_size);
+    save_u32 (mem_start);
+    save_u32 (mem_size);
+    save_u32 (mem_type);
+    save_u32 (longget (mem_start + 12));	/* version+revision */
+    save_u32 (0);
+    sprintf (dst, "Kickstart %d.%d", wordget (mem_start + 12), wordget (mem_start + 14));
+    dst += strlen (dst) + 1;
+    if (saverom) {
+	for (i = 0; i < mem_size; i++)
+	    *dst++ = byteget (mem_start + i);
+    }
+    *len = dst - dstbak;
+    return dstbak;
+}
diff -urN src-0.8.22/src/mmu.c src-0.8.22-mmu/src/mmu.c
--- src-0.8.22/src/mmu.c	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/mmu.c	2003-07-25 12:38:37.000000000 +0200
@@ -0,0 +1,733 @@
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "events.h"
+#include "uae.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "autoconf.h"
+#include "ersatz.h"
+#include "debug.h"
+#include "compiler.h"
+#include "gui.h"
+#include "savestate.h"
+
+#define DBG_MMU_VERBOSE	1
+#define DBG_MMU_SANITY	0
+
+static void mmu_dump_ttr(const char * label, uae_u32 ttr)
+{
+	uae_u32 from_addr, to_addr;
+
+	from_addr = ttr & MMU_TTR_LOGICAL_BASE;
+	to_addr = (ttr & MMU_TTR_LOGICAL_MASK) << 8;
+	
+	printf("%s: [%08lx] %08lx - %08lx enabled=%d supervisor=%d wp=%d cm=%02d\n",
+			label, ttr,
+			from_addr, to_addr,
+			ttr & MMU_TTR_BIT_ENABLED ? 1 : 0,
+			(ttr & (MMU_TTR_BIT_SFIELD_ENABLED | MMU_TTR_BIT_SFIELD_SUPER)) >> MMU_TTR_SFIELD_SHIFT,
+			ttr & MMU_TTR_BIT_WRITE_PROTECT ? 1 : 0,
+			(ttr & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT
+		  );
+}
+
+extern void mmu_make_transparent_region(uaecptr baseaddr, uae_u32 size, int datamode)
+{
+	uae_u32 * ttr;
+	uae_u32 * ttr0 = datamode ? &regs.dtt0 : &regs.itt0;
+	uae_u32 * ttr1 = datamode ? &regs.dtt1 : &regs.itt1;
+
+	if ((*ttr1 & MMU_TTR_BIT_ENABLED) == 0)
+		ttr = ttr1;
+	else if ((*ttr0 & MMU_TTR_BIT_ENABLED) == 0)
+		ttr = ttr0;
+	else
+		return;
+
+	*ttr = baseaddr & MMU_TTR_LOGICAL_BASE;
+	*ttr |= ((baseaddr + size - 1) & MMU_TTR_LOGICAL_BASE) >> 8;
+	*ttr |= MMU_TTR_BIT_ENABLED;
+
+	write_log("MMU: map transparent mapping of %08x\n", *ttr);
+}
+
+/* check if an address matches a ttr */
+STATIC_INLINE int mmu_match_ttr(uae_u32 ttr, uaecptr addr, int write, int test)
+{
+	if (ttr & MMU_TTR_BIT_ENABLED)	{	/* TTR enabled */
+		uae_u8 msb, match, mask;
+		
+		msb = (addr & MMU_TTR_LOGICAL_BASE) >> 24;
+		match = (ttr & MMU_TTR_LOGICAL_BASE) >> 24;
+		mask = (ttr & MMU_TTR_LOGICAL_MASK) >> 16;
+		
+		if ((msb & ~mask) == match) {
+
+			if ((ttr & MMU_TTR_BIT_SFIELD_ENABLED) == 0)	{
+				if ((ttr & MMU_TTR_BIT_SFIELD_SUPER) && !regs.s)	{
+					return TTR_NO_MATCH;
+				}
+				if ((ttr & MMU_TTR_BIT_SFIELD_SUPER) == 0 && regs.s)	{
+					return TTR_NO_MATCH;
+				}
+			}
+
+			if (test)	{
+				regs.mmusr = MMU_MMUSR_T | MMU_MMUSR_R;
+			}
+
+			if ((ttr & MMU_TTR_BIT_WRITE_PROTECT) && write)
+				return TTR_NO_WRITE;
+			return TTR_OK_MATCH;
+		}
+	}
+	return TTR_NO_MATCH;
+}
+
+struct mmu_atc_line atc[64];
+static int atc_rand = 0;
+static int atc_last_hit = -1;
+
+/* {{{ mmu_dump_table */
+static void mmu_dump_table(const char * label, uaecptr root_ptr)
+{
+	const int ROOT_TABLE_SIZE = 128,
+		PTR_TABLE_SIZE = 128,
+		PAGE_TABLE_SIZE = 64,
+		ROOT_INDEX_SHIFT = 25,
+		PTR_INDEX_SHIFT = 18,
+		PAGE_INDEX_SHIFT = 12;
+	int root_idx, ptr_idx, page_idx;
+	uae_u32 root_des, ptr_des, page_des;
+	uaecptr ptr_des_addr, page_addr,
+		root_log, ptr_log, page_log;
+		
+	printf("%s: root=%lx\n", label, root_ptr);
+	
+	for (root_idx = 0; root_idx < ROOT_TABLE_SIZE; root_idx++)	{
+		root_des = phys_get_long(root_ptr + root_idx);
+
+		if ((root_des & 2) == 0)
+			continue;	/* invalid */
+		
+		printf("ROOT: %03d U=%d W=%d UDT=%02d\n", root_idx,
+				root_des & 8 ? 1 : 0,
+				root_des & 4 ? 1 : 0,
+				root_des & 3
+			  );
+
+		root_log = root_idx << 25;
+		
+		ptr_des_addr = root_des & MMU_ROOT_PTR_ADDR_MASK;
+		
+		for (ptr_idx = 0; ptr_idx < PTR_TABLE_SIZE; ptr_idx++)	{
+			struct {
+				uaecptr	log, phys;
+				int start_idx, n_pages;	/* number of pages covered by this entry */
+				uae_u32 match;
+			} page_info[PAGE_TABLE_SIZE];
+			int n_pages_used;
+
+			ptr_des = phys_get_long(ptr_des_addr + ptr_idx);
+			ptr_log = root_log | (ptr_idx << 18);
+
+			if ((ptr_des & 2) == 0)
+				continue; /* invalid */
+
+			page_addr = ptr_des & (regs.mmu_pagesize ? MMU_PTR_PAGE_ADDR_MASK_8 : MMU_PTR_PAGE_ADDR_MASK_4);
+
+			n_pages_used = -1;
+			for (page_idx = 0; page_idx < PAGE_TABLE_SIZE; page_idx++)	{
+				
+				page_des = phys_get_long(page_addr + page_idx);
+				page_log = ptr_log | (page_idx << 2);
+
+				switch (page_des & 3)	{
+					case 0: /* invalid */
+						continue;
+					case 1: case 3: /* resident */
+					case 2: /* indirect */
+						if (n_pages_used == -1 || page_info[n_pages_used].match != page_des)	{
+							/* use the next entry */
+							n_pages_used++;
+
+							page_info[n_pages_used].match = page_des;
+							page_info[n_pages_used].n_pages = 1;
+							page_info[n_pages_used].start_idx = page_idx;
+							page_info[n_pages_used].log = page_log;
+						}
+						else	{
+							page_info[n_pages_used].n_pages++;
+						}
+						break;
+				}
+			}
+
+			if (n_pages_used == -1)
+				continue;
+
+			printf(" PTR: %03d U=%d W=%d UDT=%02d\n", ptr_idx,
+				ptr_des & 8 ? 1 : 0,
+				ptr_des & 4 ? 1 : 0,
+				ptr_des & 3
+			  );
+
+
+			for (page_idx = 0; page_idx <= n_pages_used; page_idx++)	{
+				page_des = page_info[page_idx].match;
+
+				if ((page_des & MMU_PDT_MASK) == 2)	{
+					printf("  PAGE: %03d-%03d log=%08lx INDIRECT --> addr=%08lx\n",
+							page_info[page_idx].start_idx,
+							page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1,
+							page_info[page_idx].log,
+							page_des & MMU_PAGE_INDIRECT_MASK
+						  );
+
+				}
+				else	{
+					printf("  PAGE: %03d-%03d log=%08lx addr=%08lx UR=%02d G=%d U1/0=%d S=%d CM=%d M=%d U=%d W=%d\n",
+							page_info[page_idx].start_idx,
+							page_info[page_idx].start_idx + page_info[page_idx].n_pages - 1,
+							page_info[page_idx].log,
+							page_des & (regs.mmu_pagesize ? MMU_PAGE_ADDR_MASK_8 : MMU_PAGE_ADDR_MASK_4),
+							(page_des & (regs.mmu_pagesize ? MMU_PAGE_UR_MASK_8 : MMU_PAGE_UR_MASK_4)) >> MMU_PAGE_UR_SHIFT,
+							page_des & MMU_DES_GLOBAL ? 1 : 0,
+							(page_des & MMU_TTR_UX_MASK) >> MMU_TTR_UX_SHIFT,
+							page_des & MMU_DES_SUPER ? 1 : 0,
+							(page_des & MMU_TTR_CACHE_MASK) >> MMU_TTR_CACHE_SHIFT,
+							page_des & MMU_DES_MODIFIED ? 1 : 0,
+							page_des & MMU_DES_USED ? 1 : 0,
+							page_des & MMU_DES_WP ? 1 : 0
+						  );
+				}
+			}
+		}
+		
+	}
+}
+/* }}} */
+
+/* {{{ mmu_dump_atc */
+void mmu_dump_atc(void)
+{
+	int i;
+	for (i = 0; i < 64; i++)	{
+		if (!atc[i].v)
+			continue;
+		printf("ATC[%02d] G=%d S=%d CM=%d M=%d W=%d R=%d FC2=%d log=%08x --> phys=%08x\n",
+				i, atc[i].g ? 1 : 0, atc[i].s, atc[i].cm, atc[i].m, atc[i].w, atc[i].r,
+				atc[i].fc2 ? 1 : 0,
+				atc[i].log,
+				atc[i].phys
+				);
+	}
+}
+/* }}} */
+
+/* {{{ mmu_dump_tables */
+void mmu_dump_tables(void)
+{
+	if (currprefs.cpu_level != 4)	{
+		printf("This CPU has no MMU hardware\n");
+		return;
+	}
+	printf("URP: %08x   SRP: %08x  MMUSR: %x  TC: %x\n", regs.urp, regs.srp, regs.mmusr, regs.tc);
+	mmu_dump_ttr("DTT0", regs.dtt0);	
+	mmu_dump_ttr("DTT1", regs.dtt1);	
+	mmu_dump_ttr("ITT0", regs.itt0);	
+	mmu_dump_ttr("ITT1", regs.itt1);	
+	mmu_dump_atc();
+	//mmu_dump_table("SRP", regs.srp);
+}
+/* }}} */
+
+static void phys_dump_mem (uaecptr addr, int lines)
+{
+	for (;lines--;) {
+		int i;
+		printf ("%08lx ", addr);
+		for (i = 0; i < 16; i++) {
+			printf ("%04x ", phys_get_word(addr)); addr += 2;
+		}
+		printf ("\n");
+	}
+}
+
+
+uaecptr REGPARAM2 mmu_translate(uaecptr theaddr, int fc, int write, uaecptr pc, int size, int test)
+{
+	uae_u32 
+		atc_hit_addr = 0,
+		root_ptr,
+		root_des, root_des_addr,
+		ptr_des = 0, ptr_des_addr = 0,
+		page_des = 0, page_des_addr = 0,
+		phys_addr = 0,
+		fslw = 0;
+	uae_u8	ri, pi, pgi, wp = 0;
+	uae_u16	ssw = 0;
+	uae_u32 page_frame;
+	int supervisor, datamode =0;
+	int i, atc_sel, atc_index = -1, n_table_searches = 0;
+
+//	if (theaddr == 0x40000000) test |= MMU_TEST_VERBOSE;
+	
+	supervisor = fc & 4;
+	
+	switch(fc)	{
+		case 0: /* data cache push */
+		case 1:
+		case 3:
+		case 5:
+			datamode = 1;
+			break;
+		case 2:
+		case 4:
+		case 6:
+			datamode = 0;
+			break;
+		case 7:
+		default:
+			write_log("FC=%d should not happen\n", datamode);
+			abort();
+	}
+	
+	root_ptr = supervisor ? regs.srp : regs.urp;
+	
+	/* check ttr0 */
+
+	/* TTR operate independently from the enable bit, so we can just ignore it if the MMU
+	 * is not enabled to get better performance.
+	 * But AmigaOS depends on PTEST to operate when the MMU is disabled;
+	 * it uses the result in the ssw to detect a working MMU and then enables the MMU */
+	if (regs.mmu_enabled || test)	{
+		switch(mmu_match_ttr(datamode ? regs.dtt0 : regs.itt0, theaddr, write, test))	{
+			case TTR_NO_WRITE:
+				write_log("MMU: write protected (via ttr) %lx\n", theaddr);
+				goto bus_err;
+			case TTR_OK_MATCH:
+				return theaddr;
+		}
+		/* check ttr1 */
+		switch(mmu_match_ttr(datamode ? regs.dtt1 : regs.itt1, theaddr, write, test))	{
+			case TTR_NO_WRITE:
+				write_log("MMU: write protected (via ttr) %lx\n", theaddr);
+				goto bus_err;
+			case TTR_OK_MATCH:
+				return theaddr;
+		}
+	}
+
+	if (!regs.mmu_enabled)
+		return theaddr;
+
+
+	ri = (theaddr & 0xfe000000) >> 25;
+	pi = (theaddr & 0x01fc0000) >> 18;
+	if (regs.mmu_pagesize == MMU_PAGE_8KB)	{
+		pgi = (theaddr & 0x3e000) >> 13;
+		page_frame = theaddr & 0xffffe000;
+		atc_sel = ((theaddr & 0x1e000) >> 13) & 0xf;
+	}
+	else	{
+		pgi = (theaddr & 0x3f000) >> 12;
+		page_frame = theaddr & 0xfffff000;
+		atc_sel = ((theaddr & 0xf000) >> 12) & 0xf;
+	}
+check_atc:
+	
+	atc_rand++;	/* for random replacement */
+	
+	if (test & MMU_TEST_FORCE_TABLE_SEARCH)
+		goto table_search;
+	
+	for (i = 0; i < 4; i++)	{
+		atc_index = atc_sel + (4 * i);
+
+#if DBG_MMU_VERBOSE
+	if (test & MMU_TEST_VERBOSE)
+		write_log("MMU: %lx checking atc %d\nv=%d log=%lx s=%d PHYS=%lx (frame=%lx s=%d)\n",
+				theaddr, atc_index,
+				atc[atc_index].v, atc[atc_index].log, atc[atc_index].s,
+				atc[atc_index].phys,
+				page_frame, regs.s);
+#endif
+
+
+		if (atc[atc_index].v && (atc[atc_index].log == page_frame) && atc[atc_index].fc2 == (fc & 4))
+			break;
+		atc_index = -1;
+	}
+
+	if (atc_index != -1)	{
+atc_matched:
+
+		/* it's a hit! */
+
+		if (!atc[atc_index].r)	{
+#if DBG_MMU_VERBOSE
+			write_log("MMU: non-resident page!\n");
+#endif
+			goto bus_err;
+		}
+
+
+		wp = atc[atc_index].w;
+
+		atc_hit_addr = atc[atc_index].phys | ((regs.mmu_pagesize == MMU_PAGE_8KB) 
+				? (theaddr & 0x1fff)
+				: (theaddr & 0x0fff));
+
+		if (test)	{
+			if (atc[atc_index].g)
+				regs.mmusr |= MMU_MMUSR_G;
+			if (atc[atc_index].s)
+				regs.mmusr |= MMU_MMUSR_S;
+			if (atc[atc_index].m)
+				regs.mmusr |= MMU_MMUSR_M;
+			if (atc[atc_index].w)
+				regs.mmusr |= MMU_MMUSR_W;
+			if (atc[atc_index].r)
+				regs.mmusr |= MMU_MMUSR_R;
+
+			regs.mmusr |= atc[atc_index].phys & MMU_MMUSR_ADDR_MASK;
+		}
+
+#if DBG_MMU_VERBOSE
+		if (test & MMU_TEST_VERBOSE)
+			if (atc_last_hit != atc_index)	{
+				atc_last_hit = atc_index;
+				write_log("MMU: ATC %d HIT! %lx --> %lx\n", atc_index, theaddr, atc_hit_addr);
+				write_log("MMU: ATC v=%d log=%lx s=%d PHYS=%lx (frame=%lx s=%d)\n",
+						atc[atc_index].v, atc[atc_index].log, atc[atc_index].s,
+						atc[atc_index].phys,
+						page_frame, regs.s);
+
+			}
+#endif
+
+		if (atc[atc_index].s && !supervisor)	{
+			write_log("MMU: Supervisor only\n");
+			fslw |= (1 << 8);
+			goto bus_err;
+		}
+		if (wp && write)	{
+			write_log("MMU: write protected!\n");
+			fslw |= (1 << 7);
+			goto bus_err;
+		}
+
+		if (!atc[atc_index].m && write)	{
+			/* we need to update the M bit of the final descriptor */
+			goto table_search;
+		}
+#if 0
+		goto table_search;
+#endif
+		return atc_hit_addr;
+	}
+	atc_index = -1;
+	
+table_search:
+
+	if (n_table_searches++ > 3)	{
+		write_log("MMU: apparently looping during table search.\n");
+		abort();
+	}
+	
+	if (atc_index == -1)	{
+		//write_log("MMU: replace atc: ");
+		for (i = 0; i < 4; i++)	{
+			if (!atc[atc_sel + (4 * i)].v)	{
+				atc_index = atc_sel + (4 * i);
+				break;
+			}
+		}
+		/* random choice */
+		if (atc_index == -1)	{
+			atc_index = atc_sel + (4 * (atc_rand & 2));
+		}
+	}
+
+	fslw |= (1 << 6); /* TWE: flag as being in table search */
+
+#if DBG_MMU_VERBOSE
+	if (test & MMU_TEST_VERBOSE)
+	write_log("MMU: table search for logical=%08x ri=%02x pi=%02x pgi=%03x page_frame=%08x root_ptr=%08x\n",
+			theaddr, ri, pi, pgi, page_frame, root_ptr);
+#endif
+
+	/* root descriptor */
+	root_des_addr = (root_ptr & MMU_ROOT_PTR_ADDR_MASK) | (ri << 2);
+
+#if DBG_MMU_SANITY
+	if (!phys_valid_address(root_des_addr, sz_long))
+		goto bus_err;
+#endif
+	
+	root_des = phys_get_long(root_des_addr);
+
+#if DBG_MMU_VERBOSE
+	if (test & MMU_TEST_VERBOSE)	{
+	write_log("MMU: root_des_addr = %lx  val=%08x\n", root_des_addr, root_des);
+	//phys_dump_mem(root_ptr, 128 / 16);
+	}
+#endif
+	
+	switch(root_des & MMU_UDT_MASK)	{
+		case 0x0:
+		case 0x1:
+			write_log("MMU: invalid root descriptor for %lx\n", theaddr);
+			fslw |= (1 << 12); /* PTA */
+			goto make_non_resident_atc;
+	}
+	
+	wp |= root_des & MMU_DES_WP;
+	/* touch the page */
+	if (!wp && (root_des & MMU_DES_USED) == 0)	{
+		root_des |= MMU_DES_USED;
+		phys_put_long(root_des_addr, root_des);
+	}
+
+	
+	ptr_des_addr = (root_des & MMU_ROOT_PTR_ADDR_MASK) | (pi << 2);
+#if DBG_MMU_SANITY
+	if (!phys_valid_address(ptr_des_addr, sz_long))
+		goto bus_err;
+#endif
+	
+	ptr_des = phys_get_long(ptr_des_addr);
+#if DBG_MMU_VERBOSE
+	if (test & MMU_TEST_VERBOSE)	
+	write_log("MMU: ptr_des_addr = %lx  val=%08x\n", ptr_des_addr, ptr_des);
+	//phys_dump_mem(ptr_des_addr, 128 / 16);
+#endif
+	
+	switch(ptr_des & MMU_UDT_MASK)	{
+		case 0x0:
+		case 0x1:
+			write_log("MMU: invalid ptr descriptor for %lx\n", theaddr);
+			fslw |= (1 << 11); /* PTB */
+			goto make_non_resident_atc;
+	}
+	wp |= ptr_des & MMU_DES_WP;
+	/* touch */
+	if (!wp && (ptr_des & MMU_DES_USED) == 0)	{
+		ptr_des |= MMU_DES_USED;
+		phys_put_long(ptr_des_addr, ptr_des);
+	}
+
+	if (regs.mmu_pagesize == MMU_PAGE_8KB)
+		page_des_addr = (ptr_des & MMU_PTR_PAGE_ADDR_MASK_8) | (pgi << 2);
+	else
+		page_des_addr = (ptr_des & MMU_PTR_PAGE_ADDR_MASK_4) | (pgi << 2);
+	
+get_page_descriptor:
+#if DBG_MMU_SANITY
+	if (!phys_valid_address(page_des_addr, sz_long))
+		goto bus_err;
+#endif
+	
+	page_des = phys_get_long(page_des_addr);
+#if DBG_MMU_VERBOSE
+	if (test & MMU_TEST_VERBOSE)	{
+		write_log("MMU: page_des_addr = %lx  val=%08x\n", page_des_addr, page_des);
+		phys_dump_mem(page_des_addr, 64 / 16);
+	}
+#endif
+
+	switch(page_des & MMU_PDT_MASK)	{
+		case 0x0:
+			write_log("MMU: invalid page descriptor log=%08lx page_des=%08lx @%08lx\n", theaddr, page_des, page_des_addr);
+			fslw |= (1 << 9); /* PF */
+			goto make_non_resident_atc;
+		case 0x1:
+		case 0x3:
+			/* resident page */
+			break;
+		case 0x2:
+		default:
+			/* indirect */
+			if (fslw & (1 << 10))	{
+				write_log("MMU: double indirect descriptor log=%lx descriptor @ %lx\n", theaddr, page_des_addr);
+				goto make_non_resident_atc;
+			}
+			page_des_addr = page_des & MMU_PAGE_INDIRECT_MASK;
+			fslw |= (1 << 10); /* IL - in case a fault occurs later, tag it as indirect */
+			goto get_page_descriptor;
+	}
+
+	wp |= page_des & MMU_DES_WP;
+	if (!wp)	{
+		int modify = 0;
+		if ((page_des & MMU_DES_USED) == 0)	{
+			page_des |= MMU_DES_USED;
+			modify = 1;
+		}
+		/* set the modified bit */
+		if (write && (page_des & MMU_DES_MODIFIED) == 0)	{
+			page_des |= MMU_DES_MODIFIED;
+			modify = 1;
+		}
+		if (modify)
+			phys_put_long(page_des_addr, page_des);
+	}
+	
+	atc[atc_index].log = page_frame;
+	atc[atc_index].v = 1;
+	atc[atc_index].r = 1;
+	atc[atc_index].s = page_des & MMU_DES_SUPER;	/* supervisor */
+	atc[atc_index].w = wp;
+	atc[atc_index].fc2 = fc & 4;
+	atc[atc_index].g = page_des & MMU_DES_GLOBAL;
+	atc[atc_index].phys = page_des & (regs.mmu_pagesize ? MMU_PAGE_ADDR_MASK_8 : MMU_PAGE_ADDR_MASK_4);
+
+	atc[atc_index].m = (page_des & MMU_DES_MODIFIED) ? 1 : 0;
+
+	
+#if 0
+	if (atc_hit_addr != 0 && atc_hit_addr != phys_addr)	{
+		write_log("MMU: ERROR! ATC hit does not match table search! for %lx --> %lx (atc gave %lx)\n",
+				theaddr, phys_addr, atc_hit_addr);
+		activate_debugger();
+	}
+#endif
+	/* re-use the end of the atc code */
+	goto atc_matched;
+	
+bus_err:
+
+	ssw |= (1 << 10);	/* ATC */
+	if (!write)
+		ssw |= (1 << 8);
+
+	fslw |= (1 << (write ? 23 : 24));
+	if (!datamode)	{
+		fslw |= (1 << 15); /* IO */
+
+		if (supervisor)
+			ssw |= 0x6;
+		else
+			ssw |= 0x2;
+	}
+#if 0
+	if (regs.t0)
+		fslw |= (1 << 19);
+	if (regs.t1)
+		fslw |= (1 << 20);
+#endif
+
+	ssw |= fc & 7; /* Copy TM */
+	
+	regs.mmu_fault_addr = theaddr;
+	regs.mmu_fslw = fslw;
+	regs.mmu_ssw = ssw;
+
+	if (test)
+		regs.mmusr |= MMU_MMUSR_B;
+	
+	write_log("BUS ERROR: fc=%d w=%d log=%08x ssw=%04x fslw=%08x\n", fc, write, theaddr, ssw, fslw);
+
+	if ((test & MMU_TEST_NO_BUSERR) == 0)	{
+		Exception(2, pc);
+		longjmp(m68k_exception, 0);
+	}
+	return 0;
+
+make_non_resident_atc:
+#if DBG_MMU_VERBOSE
+	write_log("MMU: table search for logical=%08x FC=%d ri=%02x pi=%02x pgi=%03x page_frame=%08x root_ptr=%08x\n",
+			theaddr, fc, ri, pi, pgi, page_frame, root_ptr);
+	write_log("MMU: root_des_addr = %lx  val=%08x\n", root_des_addr, root_des);
+	write_log("MMU: ptr_des_addr = %lx  val=%08x\n", ptr_des_addr, ptr_des);
+	write_log("MMU: page_des_addr = %lx  val=%08x\n", page_des_addr, page_des);
+	mmu_dump_ttr("DTT0", regs.dtt0);	
+	mmu_dump_ttr("DTT1", regs.dtt1);	
+	mmu_dump_ttr("ITT0", regs.itt0);	
+	mmu_dump_ttr("ITT1", regs.itt1);	
+#endif
+
+	atc[atc_index].log = page_frame;
+	atc[atc_index].phys = 0;
+	atc[atc_index].v = 0;
+	atc[atc_index].r = 0;
+	goto bus_err;
+}
+
+void mmu_op(uae_u32 opcode, uae_u16 extra)
+{
+	if ((opcode & 0xFE0) == 0x0500) {
+		int i, regno, didflush = 0;
+		/* PFLUSH */
+		mmu_set_mmusr(0);
+
+		regno = opcode & 7;
+		
+		switch((opcode & 24) >> 3)	{
+			case 0:
+				/* PFLUSHN (An) flush page entry if not global */
+				write_log ("PFLUSHN (A%d) %08x DFC=%d\n", regno, m68k_areg(regs, regno), regs.dfc);
+				for (i = 0; i < 64; i++)	{
+					if (atc[i].v && !atc[i].g && (atc[i].log == m68k_areg(regs, regno))
+							&& (regs.dfc & 4) == atc[i].fc2)
+					{
+						atc[i].v = 0;
+						didflush++;
+					}
+				}
+				break;
+			case 1:
+				/* PFLUSH (An) flush page entry */
+				write_log ("PFLUSH (A%d) %08x DFC=%d\n", regno, m68k_areg(regs, regno), regs.dfc);
+				for (i = 0; i < 64; i++)	{
+					if (atc[i].v && (atc[i].log == m68k_areg(regs, regno))
+							&& (regs.dfc & 4) == atc[i].fc2)
+					{
+						atc[i].v = 0;
+						didflush++;
+					}
+				}
+
+				break;
+
+			case 2:
+				/* PFLUSHAN flush all except global */
+				write_log ("PFLUSHAN\n");
+				for (i = 0; i < 64; i++)	{
+					if (atc[i].v && !atc[i].g && (atc[i].log == m68k_areg(regs, regno)))
+						atc[i].v = 0;
+				}
+				break;
+
+			case 3:
+				/* PFLUSHA flush all entries */
+				write_log ("PFLUSHA\n");
+				for (i = 0; i < 64; i++)	{
+					if (atc[i].v)
+						didflush++;
+					atc[i].v = 0;
+				}
+				atc_last_hit = -1;
+				break;
+		}
+		if (didflush)
+			write_log("  -> flushed %d matching entries\n", didflush);
+		
+	} else if ((opcode & 0x0FD8) == 0x548) {
+		int write, regno;
+		regno = opcode & 7;
+		write = opcode & 32;
+		write_log ("PTEST%c (A%d) %08x DFC=%d\n", write ? 'W' : 'R', regno, m68k_areg(regs, regno), regs.dfc);
+		mmu_set_mmusr(0);
+		mmu_translate(m68k_areg(regs, regno), regs.dfc, write, m68k_getpc(), sz_byte, 1); 
+		write_log("PTEST result: mmusr %08x\n", regs.mmusr);
+	} else
+		op_illg (opcode);
+}
+
+
diff -urN src-0.8.22/src/newcpu.c src-0.8.22-mmu/src/newcpu.c
--- src-0.8.22/src/newcpu.c	2002-02-25 17:33:08.000000000 +0100
+++ src-0.8.22-mmu/src/newcpu.c	2003-07-25 12:35:24.000000000 +0200
@@ -24,6 +24,14 @@
 #include "savestate.h"
 #include "blitter.h"
 
+#define SANITY_CHECK_ATC	1
+#define MMU_SETJMP_EXCEPTIONS	1
+
+#if MMU_SETJMP_EXCEPTIONS
+jmp_buf m68k_exception;
+#endif
+
+
 /* Opcode of faulting instruction */
 uae_u16 last_op_for_exception_3;
 /* PC at fault time */
@@ -46,7 +54,7 @@
 
 #define COUNT_INSTRS 0
 
-#if COUNT_INSTRS
+#if COUNT_INSTRS /* {{{ */
 static unsigned long int instrcount[65536];
 static uae_u16 opcodenums[65536];
 
@@ -94,7 +102,7 @@
 void dump_counts (void)
 {
 }
-#endif
+#endif /* }}} */
 
 int broken_in;
 
@@ -106,6 +114,7 @@
     return 4;
 }
 
+/* {{{ CPU init/table building */
 static void build_cpufunctbl (void)
 {
     int i;
@@ -176,6 +185,9 @@
 {
     int i;
 
+    memset(&atc, 0, sizeof(atc));
+    regs.mmu_enabled = 0;
+	
     update_68k_cycles ();
 
     for (i = 0 ; i < 256 ; i++) {
@@ -196,7 +208,7 @@
 	fpp_movem_index2[i] = j;
 	fpp_movem_next[i] = i & (~(1 << j));
     }
-#if COUNT_INSTRS
+#if COUNT_INSTRS /* {{{ */
     {
 	FILE *f = fopen (icountfilename (), "r");
 	memset (instrcount, 0, sizeof instrcount);
@@ -211,7 +223,7 @@
 	    fclose(f);
 	}
     }
-#endif
+#endif /* }}} */
     write_log ("Building CPU table for configuration: 68");
     if (currprefs.address_space_24 && currprefs.cpu_level > 1)
         write_log ("EC");
@@ -244,6 +256,7 @@
 
     build_cpufunctbl ();
 }
+/* }}} */
 
 struct regstruct regs, lastint_regs;
 static struct regstruct regs_backup[16];
@@ -251,10 +264,6 @@
 static long int m68kpc_offset;
 int lastint_no;
 
-#define get_ibyte_1(o) get_byte(regs.pc + (regs.pc_p - regs.pc_oldp) + (o) + 1)
-#define get_iword_1(o) get_word(regs.pc + (regs.pc_p - regs.pc_oldp) + (o))
-#define get_ilong_1(o) get_long(regs.pc + (regs.pc_p - regs.pc_oldp) + (o))
-
 uae_s32 ShowEA (FILE *f, int reg, amodes mode, wordsizes size, char *buf)
 {
     uae_u16 dp;
@@ -681,6 +690,45 @@
 	unset_special (SPCFLAG_TRACE);
 }
 
+/* for building exception frames */
+STATIC_INLINE void exc_push_word(uae_u16 w)
+{
+    m68k_areg(regs, 7) -= 2;
+    put_word(m68k_areg(regs, 7), w);
+}
+STATIC_INLINE void exc_push_long(uae_u32 l)
+{
+    m68k_areg(regs, 7) -= 4;
+    put_long (m68k_areg(regs, 7), l);
+}
+
+STATIC_INLINE void exc_make_frame(
+		int format,
+		uae_u16	sr,
+		uae_u32 currpc,
+		int nr,
+		uae_u32 x0,
+		uae_u32 x1
+)
+{
+    switch(format)	{
+    case 4:
+        exc_push_long(x1);
+	exc_push_long(x0);
+	break;
+    case 3:
+    case 2:
+        exc_push_long(x0);
+	break;
+    }
+
+    exc_push_word((format << 12) + (nr * 4));	/* format | vector */
+    exc_push_long(currpc);
+    exc_push_word(sr);
+}
+		
+int in_exception_2 = 0;
+
 void Exception(int nr, uaecptr oldpc)
 {
     uae_u32 currpc = m68k_getpc ();
@@ -696,56 +744,107 @@
 	    m68k_areg(regs, 7) = regs.isp;
 	regs.s = 1;
     }
+
+    if (nr == 2 && in_exception_2++)	{
+        write_log("HALT: Double Bus Error means bad news!\n");
+	abort();
+    }
+
     if (currprefs.cpu_level > 0) {
-	if (nr == 2 || nr == 3) {
-	    int i;
-	    /* @@@ this is probably wrong (?) */
-	    for (i = 0 ; i < 12 ; i++) {
-		m68k_areg(regs, 7) -= 2;
-		put_word (m68k_areg(regs, 7), 0);
+        if (nr == 2)	{
+	    write_log("Exception 2!!\n");
+	    if (currprefs.cpu_level == 5)	{
+	        /* 68060 */
+	        exc_make_frame(4,
+			       regs.sr,
+			       currpc,
+			       nr,
+			       regs.mmu_fault_addr, /* fault address */
+			       regs.mmu_fslw /* fault status long-word */
+			       );
 	    }
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), 0xa000 + nr * 4);
+	    else if (currprefs.cpu_level == 4)	{
+	        /* 68040 */
+	        exc_push_long(0);	/* PD3 */
+		exc_push_long(0);	/* PD2 */
+		exc_push_long(0);	/* PD1 */
+		exc_push_long(0);	/* PD0/WB1D */
+		exc_push_long(0);	/* WB1A */
+		exc_push_long(0);	/* WB2D */
+		exc_push_long(0);	/* WB2A */
+		exc_push_long(0);	/* WB3D */
+		exc_push_long(0);	/* WB3A */
+		exc_push_long(regs.mmu_fault_addr);	
+		exc_push_word(0);	/* WB1S */
+		exc_push_word(0);	/* WB2S */
+		exc_push_word(0);	/* WB3S */
+		exc_push_word(0);	/* WB3S */
+		exc_push_word(regs.mmu_ssw);
+		exc_push_long(regs.mmu_fault_addr);	/* EA */
+		exc_make_frame(7,
+			       regs.sr,
+			       currpc,
+			       2,
+			       0,
+			       0);
+	    }
+	}
+	else if (nr == 3) {
+	    int i;
+
+	    exc_make_frame(2,
+			   regs.sr,
+			   last_addr_for_exception_3,
+			   nr,
+			   last_fault_for_exception_3 & 0xfffffffe,	
+			   0
+			   );
+	
 	} else if (nr ==5 || nr == 6 || nr == 7 || nr == 9) {
-	    m68k_areg(regs, 7) -= 4;
-	    put_long (m68k_areg(regs, 7), oldpc);
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), 0x2000 + nr * 4);
+	    /* div by zero, CHK, TRAP or TRACE */
+	    exc_make_frame(2,
+			   regs.sr,
+			   currpc,
+			   nr,
+			   oldpc,
+			   0
+			   );
 	} else if (regs.m && nr >= 24 && nr < 32) {
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), nr * 4);
-	    m68k_areg(regs, 7) -= 4;
-	    put_long (m68k_areg(regs, 7), currpc);
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), regs.sr);
+	    /* interrupts! */
+	    exc_make_frame(0,
+			   regs.sr,
+			   currpc,
+			   nr,
+			   0, 0);
 	    regs.sr |= (1 << 13);
 	    regs.msp = m68k_areg(regs, 7);
 	    m68k_areg(regs, 7) = regs.isp;
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), 0x1000 + nr * 4);
+			
+	    exc_make_frame(1,	/* throwaway */
+			   regs.sr,
+			   currpc,
+			   nr,
+			   0, 0);
 	} else {
-	    m68k_areg(regs, 7) -= 2;
-	    put_word (m68k_areg(regs, 7), nr * 4);
+	    exc_make_frame(0,
+			   regs.sr,
+			   currpc,
+			   nr,
+			   0, 0);
 	}
     } else {
-	if (nr == 2 || nr == 3) {
-	    m68k_areg(regs, 7) -= 12;
-	    /* ??????? */
-	    if (nr == 3) {
-		put_long (m68k_areg(regs, 7), last_fault_for_exception_3);
-		put_word (m68k_areg(regs, 7)+4, last_op_for_exception_3);
-		put_long (m68k_areg(regs, 7)+8, last_addr_for_exception_3);
-	    }
-	    write_log ("Exception!\n");
-	    goto kludge_me_do;
+        if (nr == 2 || nr == 3) {
+	    write_log ("Exception %d! -- this code needs rewriting - good luck!\n", nr);
+
+	    exc_push_long(last_fault_for_exception_3);
+	    exc_push_word(last_op_for_exception_3);
+	    exc_push_long(last_addr_for_exception_3);
 	}
+	exc_push_word(regs.sr);
     }
-    m68k_areg(regs, 7) -= 4;
-    put_long (m68k_areg(regs, 7), currpc);
-kludge_me_do:
-    m68k_areg(regs, 7) -= 2;
-    put_word (m68k_areg(regs, 7), regs.sr);
     m68k_setpc (get_long (regs.vbr + 4*nr));
+    if (nr < 24 && nr >= 32)
+        write_log("EXCEPTION: %02d handler @ %lx SP=%lx\n", nr, m68k_getpc(), m68k_areg(regs, 7)); 
     fill_prefetch_0 ();
     regs.t1 = regs.t0 = regs.m = 0;
     unset_special (SPCFLAG_TRACE | SPCFLAG_DOTRACE);
@@ -762,10 +861,10 @@
     set_special (SPCFLAG_INT);
 }
 
-static uae_u32 caar, cacr, itt0, itt1, dtt0, dtt1, tc, mmusr, urp, srp;
 
 int m68k_move2c (int regno, uae_u32 *regp)
 {
+    /* 0x808 is the PCR on an '060 */
     if ((currprefs.cpu_level == 1 && (regno & 0x7FF) > 1)
 	|| (currprefs.cpu_level < 4 && (regno & 0x7FF) > 2)
 	|| (currprefs.cpu_level == 4 && regno == 0x802))
@@ -774,25 +873,24 @@
 	return 0;
     } else {
 	switch (regno) {
-	case 0: regs.sfc = *regp & 7; break;
-	case 1: regs.dfc = *regp & 7; break;
-	case 2: cacr = *regp & (currprefs.cpu_level < 4 ? 0x3 : 0x80008000); break;
-	case 3: tc = *regp & 0xc000; break;
+	case 0: regs.sfc = *regp & 7; /*write_log("SFC set to %d\n", regs.sfc);*/ break;
+	case 1: regs.dfc = *regp & 7; /*write_log("DFC set to %d\n", regs.dfc);*/ break;
+	case 2: regs.cacr = *regp & (currprefs.cpu_level < 4 ? 0x3 : 0x80008000); break;
+	case 3: mmu_set_tc(*regp & 0xc000); break;
 	    /* Mask out fields that should be zero.  */
-	case 4: itt0 = *regp & 0xffffe364; break;
-	case 5: itt1 = *regp & 0xffffe364; break;
-	case 6: dtt0 = *regp & 0xffffe364; break;
-	case 7: dtt1 = *regp & 0xffffe364; break;
+	case 4: case 5: case 6: case 7:
+	    mmu_set_ttr(regno, *regp & 0xffffe364); break;
 	  
 	case 0x800: regs.usp = *regp; break;
 	case 0x801: regs.vbr = *regp; break;
-	case 0x802: caar = *regp & 0xfc; break;
+	case 0x802: regs.caar = *regp & 0xfc; break;
 	case 0x803: regs.msp = *regp; if (regs.m == 1) m68k_areg(regs, 7) = regs.msp; break;
 	case 0x804: regs.isp = *regp; if (regs.m == 0) m68k_areg(regs, 7) = regs.isp; break;
-	case 0x805: mmusr = *regp; break;
-	case 0x806: urp = *regp; break;
-	case 0x807: srp = *regp; break;
+	case 0x805: mmu_set_mmusr(*regp); break;
+	case 0x806: case 0x807:
+	    mmu_set_root_pointer(regno, *regp); break;
 	default:
+	    write_log("move2x cpu=%d regno=%lx val=%lx\n", currprefs.cpu_level, regno, *regp);
 	    op_illg (0x4E7B);
 	    return 0;
 	}
@@ -802,31 +900,34 @@
 
 int m68k_movec2 (int regno, uae_u32 *regp)
 {
+    /* 0x808 is the PCR on an '060 */
     if ((currprefs.cpu_level == 1 && (regno & 0x7FF) > 1)
 	|| (currprefs.cpu_level < 4 && (regno & 0x7FF) > 2)
 	|| (currprefs.cpu_level == 4 && regno == 0x802))
     {
+		write_log("movec2 cpu=%d regno=%lx\n", currprefs.cpu_level, regno);
 	op_illg (0x4E7A);
 	return 0;
     } else {
 	switch (regno) {
 	case 0: *regp = regs.sfc; break;
 	case 1: *regp = regs.dfc; break;
-	case 2: *regp = cacr; break;
-	case 3: *regp = tc; break;
-	case 4: *regp = itt0; break;
-	case 5: *regp = itt1; break;
-	case 6: *regp = dtt0; break;
-	case 7: *regp = dtt1; break;
+	case 2: *regp = regs.cacr; break;
+	case 3: *regp = regs.tc; break;
+	case 4: *regp = regs.itt0; break;
+	case 5: *regp = regs.itt1; break;
+	case 6: *regp = regs.dtt0; break;
+	case 7: *regp = regs.dtt1; break;
 	case 0x800: *regp = regs.usp; break;
 	case 0x801: *regp = regs.vbr; break;
-	case 0x802: *regp = caar; break;
+	case 0x802: *regp = regs.caar; break;
 	case 0x803: *regp = regs.m == 1 ? m68k_areg(regs, 7) : regs.msp; break;
 	case 0x804: *regp = regs.m == 0 ? m68k_areg(regs, 7) : regs.isp; break;
-	case 0x805: *regp = mmusr; break;
-	case 0x806: *regp = urp; break;
-	case 0x807: *regp = srp; break;
+	case 0x805: *regp = regs.mmusr; break;
+	case 0x806: *regp = regs.urp; break;
+	case 0x807: *regp = regs.srp; break;
 	default:
+	    write_log("movec2 cpu=%d regno=%lx\n", currprefs.cpu_level, regno);
 	    op_illg (0x4E7A);
 	    return 0;
 	}
@@ -1091,6 +1192,8 @@
 
 void m68k_reset (void)
 {
+    write_log("M68K: RESET!\n");
+    mmu_set_tc(regs.tc & ~0x8000); /* disable mmu */
     regs.kick_mask = 0x00F80000;
     regs.spcflags = 0;
     if (savestate_state == STATE_RESTORE) {
@@ -1106,8 +1209,8 @@
 	return;
     }
 
-    m68k_areg (regs, 7) = get_long (0x00f80000);
-    m68k_setpc (get_long (0x00f80004));
+    m68k_areg (regs, 7) = phys_get_long (0x00f80000);
+    m68k_setpc (phys_get_long (0x00f80004));
     refill_prefetch (m68k_getpc (), 0);
     fill_prefetch_0 ();
     regs.s = 1;
@@ -1137,7 +1240,7 @@
     }
 
     compiler_flush_jsr_stack ();
-    if (opcode == 0x4E7B && get_long (0x10) == 0 && (pc & 0xF80000) == 0xF80000) {
+    if (opcode == 0x4E7B && phys_get_long (0x10) == 0 && (pc & 0xF80000) == 0xF80000) {
 	write_log ("Your Kickstart requires a 68020 CPU. Giving up.\n");
 	broken_in = 1;
 	set_special (SPCFLAG_BRK);
@@ -1185,19 +1288,6 @@
     return 4;
 }
 
-void mmu_op(uae_u32 opcode, uae_u16 extra)
-{
-    if ((opcode & 0xFE0) == 0x0500) {
-	/* PFLUSH */
-	mmusr = 0;
-	write_log ("PFLUSH\n");
-    } else if ((opcode & 0x0FD8) == 0x548) {
-	/* PTEST */
-	write_log ("PTEST\n");
-    } else
-	op_illg (opcode);
-}
-
 static int n_insns = 0, n_spcinsns = 0;
 
 static uaecptr last_trace_ad = 0;
@@ -1410,6 +1500,10 @@
     reset_frame_rate_hack ();
     update_68k_cycles ();
 
+#if MMU_SETJMP_EXCEPTIONS
+    setjmp(m68k_exception);
+#endif
+
     in_m68k_go++;
     for (;;) {
 	if (quit_program > 0) {
@@ -1557,13 +1651,14 @@
     if (currprefs.cpu_compatible)
 	fprintf (f, "prefetch %08lx\n", (unsigned long)do_get_mem_long(&regs.prefetch));
 
+	fprintf (f, "this PC: %08lx\n", m68k_getpc());
     m68k_disasm (f, m68k_getpc (), nextpc, 1);
     if (nextpc)
 	fprintf (f, "next PC: %08lx\n", *nextpc);
 }
 
 
-/* CPU save/restore code */
+/* {{{ CPU save/restore code */
 
 #define CPUTYPE_EC 1
 #define CPUMODE_HALT 1
@@ -1616,8 +1711,8 @@
 	regs.vbr = restore_u32 ();
     }
     if (model >= 68020) {
-	caar = restore_u32 ();
-	cacr = restore_u32 ();
+	regs.caar = restore_u32 ();
+	regs.cacr = restore_u32 ();
 	regs.msp = restore_u32 ();
     }
     write_log ("CPU %d%s%03d, PC=%08.8X\n",
@@ -1651,10 +1746,13 @@
 	save_u32 (regs.vbr);				/* VBR */
     }
     if(model >= 68020) {
-	save_u32 (caar);				/* CAAR */
-	save_u32 (cacr);				/* CACR */
+	save_u32 (regs.caar);				/* CAAR */
+	save_u32 (regs.cacr);				/* CACR */
 	save_u32 (regs.msp);				/* MSP */
     }
     *len = dst - dstbak;
     return dstbak;
 }
+
+/* }}} */
+
diff -urN src-0.8.22/src/newcpu.c~ src-0.8.22-mmu/src/newcpu.c~
--- src-0.8.22/src/newcpu.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/newcpu.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,1763 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * MC68000 emulation
+  *
+  * (c) 1995 Bernd Schmidt
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "events.h"
+#include "uae.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "autoconf.h"
+#include "ersatz.h"
+#include "debug.h"
+#include "compiler.h"
+#include "gui.h"
+#include "savestate.h"
+#include "blitter.h"
+
+#define SANITY_CHECK_ATC		1
+#define MMU_SETJMP_EXCEPTIONS	1
+
+#if MMU_SETJMP_EXCEPTIONS
+jmp_buf m68k_exception;
+#endif
+
+
+/* Opcode of faulting instruction */
+uae_u16 last_op_for_exception_3;
+/* PC at fault time */
+uaecptr last_addr_for_exception_3;
+/* Address that generated the exception */
+uaecptr last_fault_for_exception_3;
+
+int areg_byteinc[] = { 1,1,1,1,1,1,1,2 };
+int imm8_table[] = { 8,1,2,3,4,5,6,7 };
+
+int movem_index1[256];
+int movem_index2[256];
+int movem_next[256];
+
+int fpp_movem_index1[256];
+int fpp_movem_index2[256];
+int fpp_movem_next[256];
+
+cpuop_func *cpufunctbl[65536];
+
+#define COUNT_INSTRS 0
+
+#if COUNT_INSTRS /* {{{ */
+static unsigned long int instrcount[65536];
+static uae_u16 opcodenums[65536];
+
+static int compfn (const void *el1, const void *el2)
+{
+    return instrcount[*(const uae_u16 *)el1] < instrcount[*(const uae_u16 *)el2];
+}
+
+static char *icountfilename (void)
+{
+    char *name = getenv ("INSNCOUNT");
+    if (name)
+	return name;
+    return COUNT_INSTRS == 2 ? "frequent.68k" : "insncount";
+}
+
+void dump_counts (void)
+{
+    FILE *f = fopen (icountfilename (), "w");
+    unsigned long int total;
+    int i;
+
+    write_log ("Writing instruction count file...\n");
+    for (i = 0; i < 65536; i++) {
+	opcodenums[i] = i;
+	total += instrcount[i];
+    }
+    qsort (opcodenums, 65536, sizeof(uae_u16), compfn);
+
+    fprintf (f, "Total: %lu\n", total);
+    for (i=0; i < 65536; i++) {
+	unsigned long int cnt = instrcount[opcodenums[i]];
+	struct instr *dp;
+	struct mnemolookup *lookup;
+	if (!cnt)
+	    break;
+	dp = table68k + opcodenums[i];
+	for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++)
+	    ;
+	fprintf (f, "%04x: %lu %s\n", opcodenums[i], cnt, lookup->name);
+    }
+    fclose (f);
+}
+#else
+void dump_counts (void)
+{
+}
+#endif /* }}} */
+
+int broken_in;
+
+static unsigned long op_illg_1 (uae_u32 opcode) REGPARAM;
+
+static unsigned long REGPARAM2 op_illg_1 (uae_u32 opcode)
+{
+    op_illg (opcode);
+    return 4;
+}
+
+/* {{{ CPU init/table building */
+static void build_cpufunctbl (void)
+{
+    int i;
+    unsigned long opcode;
+    struct cputbl *tbl = (currprefs.cpu_level == 4 ? op_smalltbl_0_ff
+			  : currprefs.cpu_level == 3 ? op_smalltbl_1_ff
+			  : currprefs.cpu_level == 2 ? op_smalltbl_2_ff
+			  : currprefs.cpu_level == 1 ? op_smalltbl_3_ff
+			  : ! currprefs.cpu_compatible ? op_smalltbl_4_ff
+			  : op_smalltbl_5_ff);
+
+    write_log ("Building CPU function table (%d %d %d).\n",
+	       currprefs.cpu_level, currprefs.cpu_compatible, currprefs.address_space_24);
+
+    for (opcode = 0; opcode < 65536; opcode++)
+	cpufunctbl[opcode] = op_illg_1;
+    for (i = 0; tbl[i].handler != NULL; i++) {
+	if (! tbl[i].specific)
+	    cpufunctbl[tbl[i].opcode] = tbl[i].handler;
+    }
+    for (opcode = 0; opcode < 65536; opcode++) {
+	cpuop_func *f;
+
+	if (table68k[opcode].mnemo == i_ILLG || table68k[opcode].clev > currprefs.cpu_level)
+	    continue;
+
+	if (table68k[opcode].handler != -1) {
+	    f = cpufunctbl[table68k[opcode].handler];
+	    if (f == op_illg_1)
+		abort();
+	    cpufunctbl[opcode] = f;
+	}
+    }
+    for (i = 0; tbl[i].handler != NULL; i++) {
+	if (tbl[i].specific)
+	    cpufunctbl[tbl[i].opcode] = tbl[i].handler;
+    }
+}
+
+unsigned long cycles_mask, cycles_val;
+
+static void update_68k_cycles (void)
+{
+    cycles_mask = 0;
+    cycles_val = currprefs.m68k_speed;
+    if (currprefs.m68k_speed < 1) {
+	cycles_mask = 0xFFFFFFFF;
+	cycles_val = 0;
+    }
+}
+
+void check_prefs_changed_cpu (void)
+{
+    if (currprefs.cpu_level != changed_prefs.cpu_level
+	|| currprefs.cpu_compatible != changed_prefs.cpu_compatible) {
+	currprefs.cpu_level = changed_prefs.cpu_level;
+	currprefs.cpu_compatible = changed_prefs.cpu_compatible;
+	build_cpufunctbl ();
+    }
+    if (currprefs.m68k_speed != changed_prefs.m68k_speed) {
+	currprefs.m68k_speed = changed_prefs.m68k_speed;
+	reset_frame_rate_hack ();
+	update_68k_cycles ();
+    }
+}
+
+void init_m68k (void)
+{
+    int i;
+
+	memset(&atc, 0, sizeof(atc));
+	regs.mmu_enabled = 0;
+	
+    update_68k_cycles ();
+
+    for (i = 0 ; i < 256 ; i++) {
+	int j;
+	for (j = 0 ; j < 8 ; j++) {
+		if (i & (1 << j)) break;
+	}
+	movem_index1[i] = j;
+	movem_index2[i] = 7-j;
+	movem_next[i] = i & (~(1 << j));
+    }
+    for (i = 0 ; i < 256 ; i++) {
+	int j;
+	for (j = 7 ; j >= 0 ; j--) {
+		if (i & (1 << j)) break;
+	}
+	fpp_movem_index1[i] = 7-j;
+	fpp_movem_index2[i] = j;
+	fpp_movem_next[i] = i & (~(1 << j));
+    }
+#if COUNT_INSTRS /* {{{ */
+    {
+	FILE *f = fopen (icountfilename (), "r");
+	memset (instrcount, 0, sizeof instrcount);
+	if (f) {
+	    uae_u32 opcode, count, total;
+	    char name[20];
+	    write_log ("Reading instruction count file...\n");
+	    fscanf (f, "Total: %lu\n", &total);
+	    while (fscanf (f, "%lx: %lu %s\n", &opcode, &count, name) == 3) {
+		instrcount[opcode] = count;
+	    }
+	    fclose(f);
+	}
+    }
+#endif /* }}} */
+    write_log ("Building CPU table for configuration: 68");
+    if (currprefs.address_space_24 && currprefs.cpu_level > 1)
+        write_log ("EC");
+    switch (currprefs.cpu_level) {
+    case 1:
+        write_log ("010");
+        break;
+    case 2:
+        write_log ("020");
+        break;
+    case 3:
+        write_log ("020/881");
+        break;
+    case 4:
+        /* Who is going to miss the MMU anyway...? :-)  */
+        write_log ("040");
+        break;
+    default:
+        write_log ("000");
+        break;
+    }
+    if (currprefs.cpu_compatible)
+        write_log (" (compatible mode)");
+    write_log ("\n");
+    
+    read_table68k ();
+    do_merges ();
+
+    write_log ("%d CPU functions\n", nr_cpuop_funcs);
+
+    build_cpufunctbl ();
+}
+/* }}} */
+
+struct regstruct regs, lastint_regs;
+static struct regstruct regs_backup[16];
+static int backup_pointer = 0;
+static long int m68kpc_offset;
+int lastint_no;
+
+uae_s32 ShowEA (FILE *f, int reg, amodes mode, wordsizes size, char *buf)
+{
+    uae_u16 dp;
+    uae_s8 disp8;
+    uae_s16 disp16;
+    int r;
+    uae_u32 dispreg;
+    uaecptr addr;
+    uae_s32 offset = 0;
+    char buffer[80];
+
+    switch (mode){
+     case Dreg:
+	sprintf (buffer,"D%d", reg);
+	break;
+     case Areg:
+	sprintf (buffer,"A%d", reg);
+	break;
+     case Aind:
+	sprintf (buffer,"(A%d)", reg);
+	break;
+     case Aipi:
+	sprintf (buffer,"(A%d)+", reg);
+	break;
+     case Apdi:
+	sprintf (buffer,"-(A%d)", reg);
+	break;
+     case Ad16:
+	disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	addr = m68k_areg(regs,reg) + (uae_s16)disp16;
+	sprintf (buffer,"(A%d,$%04x) == $%08lx", reg, disp16 & 0xffff,
+					(unsigned long)addr);
+	break;
+     case Ad8r:
+	dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	disp8 = dp & 0xFF;
+	r = (dp & 0x7000) >> 12;
+	dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r);
+	if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg);
+	dispreg <<= (dp >> 9) & 3;
+
+	if (dp & 0x100) {
+	    uae_s32 outer = 0, disp = 0;
+	    uae_s32 base = m68k_areg(regs,reg);
+	    char name[10];
+	    sprintf (name,"A%d, ",reg);
+	    if (dp & 0x80) { base = 0; name[0] = 0; }
+	    if (dp & 0x40) dispreg = 0;
+	    if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+	    base += disp;
+
+	    if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+
+	    if (!(dp & 4)) base += dispreg;
+	    if (dp & 3) base = get_long (base);
+	    if (dp & 4) base += dispreg;
+
+	    addr = base + outer;
+	    sprintf (buffer,"(%s%c%d.%c*%d+%ld)+%ld == $%08lx", name,
+		    dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W',
+		    1 << ((dp >> 9) & 3),
+		    disp,outer,
+		    (unsigned long)addr);
+	} else {
+	  addr = m68k_areg(regs,reg) + (uae_s32)((uae_s8)disp8) + dispreg;
+	  sprintf (buffer,"(A%d, %c%d.%c*%d, $%02x) == $%08lx", reg,
+	       dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W',
+	       1 << ((dp >> 9) & 3), disp8,
+	       (unsigned long)addr);
+	}
+	break;
+     case PC16:
+	addr = m68k_getpc () + m68kpc_offset;
+	disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	addr += (uae_s16)disp16;
+	sprintf (buffer,"(PC,$%04x) == $%08lx", disp16 & 0xffff,(unsigned long)addr);
+	break;
+     case PC8r:
+	addr = m68k_getpc () + m68kpc_offset;
+	dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	disp8 = dp & 0xFF;
+	r = (dp & 0x7000) >> 12;
+	dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r);
+	if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg);
+	dispreg <<= (dp >> 9) & 3;
+
+	if (dp & 0x100) {
+	    uae_s32 outer = 0,disp = 0;
+	    uae_s32 base = addr;
+	    char name[10];
+	    sprintf (name,"PC, ");
+	    if (dp & 0x80) { base = 0; name[0] = 0; }
+	    if (dp & 0x40) dispreg = 0;
+	    if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+	    base += disp;
+
+	    if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+
+	    if (!(dp & 4)) base += dispreg;
+	    if (dp & 3) base = get_long (base);
+	    if (dp & 4) base += dispreg;
+
+	    addr = base + outer;
+	    sprintf (buffer,"(%s%c%d.%c*%d+%ld)+%ld == $%08lx", name,
+		    dp & 0x8000 ? 'A' : 'D', (int)r, dp & 0x800 ? 'L' : 'W',
+		    1 << ((dp >> 9) & 3),
+		    disp,outer,
+		    (unsigned long)addr);
+	} else {
+	  addr += (uae_s32)((uae_s8)disp8) + dispreg;
+	  sprintf (buffer,"(PC, %c%d.%c*%d, $%02x) == $%08lx", dp & 0x8000 ? 'A' : 'D',
+		(int)r, dp & 0x800 ? 'L' : 'W',  1 << ((dp >> 9) & 3),
+		disp8, (unsigned long)addr);
+	}
+	break;
+     case absw:
+	sprintf (buffer,"$%08lx", (unsigned long)(uae_s32)(uae_s16)get_iword_1 (m68kpc_offset));
+	m68kpc_offset += 2;
+	break;
+     case absl:
+	sprintf (buffer,"$%08lx", (unsigned long)get_ilong_1 (m68kpc_offset));
+	m68kpc_offset += 4;
+	break;
+     case imm:
+	switch (size){
+	 case sz_byte:
+	    sprintf (buffer,"#$%02x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xff));
+	    m68kpc_offset += 2;
+	    break;
+	 case sz_word:
+	    sprintf (buffer,"#$%04x", (unsigned int)(get_iword_1 (m68kpc_offset) & 0xffff));
+	    m68kpc_offset += 2;
+	    break;
+	 case sz_long:
+	    sprintf (buffer,"#$%08lx", (unsigned long)(get_ilong_1 (m68kpc_offset)));
+	    m68kpc_offset += 4;
+	    break;
+	 default:
+	    break;
+	}
+	break;
+     case imm0:
+	offset = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	sprintf (buffer,"#$%02x", (unsigned int)(offset & 0xff));
+	break;
+     case imm1:
+	offset = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	sprintf (buffer,"#$%04x", (unsigned int)(offset & 0xffff));
+	break;
+     case imm2:
+	offset = (uae_s32)get_ilong_1 (m68kpc_offset);
+	m68kpc_offset += 4;
+	sprintf (buffer,"#$%08lx", (unsigned long)offset);
+	break;
+     case immi:
+	offset = (uae_s32)(uae_s8)(reg & 0xff);
+	sprintf (buffer,"#$%08lx", (unsigned long)offset);
+	break;
+     default:
+	break;
+    }
+    if (buf == 0)
+	fprintf (f, "%s", buffer);
+    else
+	strcat (buf, buffer);
+    return offset;
+}
+
+/* The plan is that this will take over the job of exception 3 handling -
+ * the CPU emulation functions will just do a longjmp to m68k_go whenever
+ * they hit an odd address. */
+static int verify_ea (int reg, amodes mode, wordsizes size, uae_u32 *val)
+{
+    uae_u16 dp;
+    uae_s8 disp8;
+    uae_s16 disp16;
+    int r;
+    uae_u32 dispreg;
+    uaecptr addr;
+    uae_s32 offset = 0;
+
+    switch (mode){
+     case Dreg:
+	*val = m68k_dreg (regs, reg);
+	return 1;
+     case Areg:
+	*val = m68k_areg (regs, reg);
+	return 1;
+
+     case Aind:
+     case Aipi:
+	addr = m68k_areg (regs, reg);
+	break;
+     case Apdi:
+	addr = m68k_areg (regs, reg);
+	break;
+     case Ad16:
+	disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	addr = m68k_areg(regs,reg) + (uae_s16)disp16;
+	break;
+     case Ad8r:
+	addr = m68k_areg (regs, reg);
+     d8r_common:
+	dp = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	disp8 = dp & 0xFF;
+	r = (dp & 0x7000) >> 12;
+	dispreg = dp & 0x8000 ? m68k_areg(regs,r) : m68k_dreg(regs,r);
+	if (!(dp & 0x800)) dispreg = (uae_s32)(uae_s16)(dispreg);
+	dispreg <<= (dp >> 9) & 3;
+
+	if (dp & 0x100) {
+	    uae_s32 outer = 0, disp = 0;
+	    uae_s32 base = addr;
+	    if (dp & 0x80) base = 0;
+	    if (dp & 0x40) dispreg = 0;
+	    if ((dp & 0x30) == 0x20) { disp = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x30) == 0x30) { disp = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+	    base += disp;
+
+	    if ((dp & 0x3) == 0x2) { outer = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset); m68kpc_offset += 2; }
+	    if ((dp & 0x3) == 0x3) { outer = get_ilong_1 (m68kpc_offset); m68kpc_offset += 4; }
+
+	    if (!(dp & 4)) base += dispreg;
+	    if (dp & 3) base = get_long (base);
+	    if (dp & 4) base += dispreg;
+
+	    addr = base + outer;
+	} else {
+	  addr += (uae_s32)((uae_s8)disp8) + dispreg;
+	}
+	break;
+     case PC16:
+	addr = m68k_getpc () + m68kpc_offset;
+	disp16 = get_iword_1 (m68kpc_offset); m68kpc_offset += 2;
+	addr += (uae_s16)disp16;
+	break;
+     case PC8r:
+	addr = m68k_getpc () + m68kpc_offset;
+	goto d8r_common;
+     case absw:
+	addr = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	break;
+     case absl:
+	addr = get_ilong_1 (m68kpc_offset);
+	m68kpc_offset += 4;
+	break;
+     case imm:
+	switch (size){
+	 case sz_byte:
+	    *val = get_iword_1 (m68kpc_offset) & 0xff;
+	    m68kpc_offset += 2;
+	    break;
+	 case sz_word:
+	    *val = get_iword_1 (m68kpc_offset) & 0xffff;
+	    m68kpc_offset += 2;
+	    break;
+	 case sz_long:
+	    *val = get_ilong_1 (m68kpc_offset);
+	    m68kpc_offset += 4;
+	    break;
+	 default:
+	    break;
+	}
+	return 1;
+     case imm0:
+	*val = (uae_s32)(uae_s8)get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	return 1;
+     case imm1:
+	*val = (uae_s32)(uae_s16)get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	return 1;
+     case imm2:
+	*val = get_ilong_1 (m68kpc_offset);
+	m68kpc_offset += 4;
+	return 1;
+     case immi:
+	*val = (uae_s32)(uae_s8)(reg & 0xff);
+	return 1;
+     default:
+	addr = 0;
+	break;
+    }
+    if ((addr & 1) == 0)
+	return 1;
+
+    last_addr_for_exception_3 = m68k_getpc () + m68kpc_offset;
+    last_fault_for_exception_3 = addr;
+    return 0;
+}
+
+uae_u32 get_disp_ea_020 (uae_u32 base, uae_u32 dp)
+{
+    int reg = (dp >> 12) & 15;
+    uae_s32 regd = regs.regs[reg];
+    if ((dp & 0x800) == 0)
+	regd = (uae_s32)(uae_s16)regd;
+    regd <<= (dp >> 9) & 3;
+    if (dp & 0x100) {
+	uae_s32 outer = 0;
+	if (dp & 0x80) base = 0;
+	if (dp & 0x40) regd = 0;
+
+	if ((dp & 0x30) == 0x20) base += (uae_s32)(uae_s16)next_iword();
+	if ((dp & 0x30) == 0x30) base += next_ilong();
+
+	if ((dp & 0x3) == 0x2) outer = (uae_s32)(uae_s16)next_iword();
+	if ((dp & 0x3) == 0x3) outer = next_ilong();
+
+	if ((dp & 0x4) == 0) base += regd;
+	if (dp & 0x3) base = get_long (base);
+	if (dp & 0x4) base += regd;
+
+	return base + outer;
+    } else {
+	return base + (uae_s32)((uae_s8)dp) + regd;
+    }
+}
+
+uae_u32 get_disp_ea_000 (uae_u32 base, uae_u32 dp)
+{
+    int reg = (dp >> 12) & 15;
+    uae_s32 regd = regs.regs[reg];
+#if 1
+    if ((dp & 0x800) == 0)
+	regd = (uae_s32)(uae_s16)regd;
+    return base + (uae_s8)dp + regd;
+#else
+    /* Branch-free code... benchmark this again now that
+     * things are no longer inline.  */
+    uae_s32 regd16;
+    uae_u32 mask;
+    mask = ((dp & 0x800) >> 11) - 1;
+    regd16 = (uae_s32)(uae_s16)regd;
+    regd16 &= mask;
+    mask = ~mask;
+    base += (uae_s8)dp;
+    regd &= mask;
+    regd |= regd16;
+    return base + regd;
+#endif
+}
+
+void MakeSR (void)
+{
+#if 0
+    assert((regs.t1 & 1) == regs.t1);
+    assert((regs.t0 & 1) == regs.t0);
+    assert((regs.s & 1) == regs.s);
+    assert((regs.m & 1) == regs.m);
+    assert((XFLG & 1) == XFLG);
+    assert((NFLG & 1) == NFLG);
+    assert((ZFLG & 1) == ZFLG);
+    assert((VFLG & 1) == VFLG);
+    assert((CFLG & 1) == CFLG);
+#endif
+    regs.sr = ((regs.t1 << 15) | (regs.t0 << 14)
+	       | (regs.s << 13) | (regs.m << 12) | (regs.intmask << 8)
+	       | (GET_XFLG << 4) | (GET_NFLG << 3) | (GET_ZFLG << 2) | (GET_VFLG << 1)
+	       | GET_CFLG);
+}
+
+void MakeFromSR (void)
+{
+    int oldm = regs.m;
+    int olds = regs.s;
+
+    regs.t1 = (regs.sr >> 15) & 1;
+    regs.t0 = (regs.sr >> 14) & 1;
+    regs.s = (regs.sr >> 13) & 1;
+    regs.m = (regs.sr >> 12) & 1;
+    regs.intmask = (regs.sr >> 8) & 7;
+    SET_XFLG ((regs.sr >> 4) & 1);
+    SET_NFLG ((regs.sr >> 3) & 1);
+    SET_ZFLG ((regs.sr >> 2) & 1);
+    SET_VFLG ((regs.sr >> 1) & 1);
+    SET_CFLG (regs.sr & 1);
+    if (currprefs.cpu_level >= 2) {
+	if (olds != regs.s) {
+	    if (olds) {
+		if (oldm)
+		    regs.msp = m68k_areg(regs, 7);
+		else
+		    regs.isp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.usp;
+	    } else {
+		regs.usp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp;
+	    }
+	} else if (olds && oldm != regs.m) {
+	    if (oldm) {
+		regs.msp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.isp;
+	    } else {
+		regs.isp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.msp;
+	    }
+	}
+    } else {
+	if (olds != regs.s) {
+	    if (olds) {
+		regs.isp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.usp;
+	    } else {
+		regs.usp = m68k_areg(regs, 7);
+		m68k_areg(regs, 7) = regs.isp;
+	    }
+	}
+    }
+
+    set_special (SPCFLAG_INT);
+    if (regs.t1 || regs.t0)
+	set_special (SPCFLAG_TRACE);
+    else
+    	/* Keep SPCFLAG_DOTRACE, we still want a trace exception for
+	   SR-modifying instructions (including STOP).  */
+	unset_special (SPCFLAG_TRACE);
+}
+
+/* for building exception frames */
+STATIC_INLINE void exc_push_word(uae_u16 w)
+{
+	m68k_areg(regs, 7) -= 2;
+	put_word(m68k_areg(regs, 7), w);
+}
+STATIC_INLINE void exc_push_long(uae_u32 l)
+{
+	m68k_areg(regs, 7) -= 4;
+	put_long (m68k_areg(regs, 7), l);
+}
+
+STATIC_INLINE void exc_make_frame(
+		int format,
+		uae_u16	sr,
+		uae_u32 currpc,
+		int nr,
+		uae_u32 x0,
+		uae_u32 x1
+)
+{
+	switch(format)	{
+		case 4:
+			exc_push_long(x1);
+			exc_push_long(x0);
+			break;
+		case 3:
+		case 2:
+			exc_push_long(x0);
+			break;
+	}
+
+	exc_push_word((format << 12) + (nr * 4));	/* format | vector */
+	exc_push_long(currpc);
+	exc_push_word(sr);
+}
+		
+int in_exception_2 = 0;
+
+void Exception(int nr, uaecptr oldpc)
+{
+    uae_u32 currpc = m68k_getpc ();
+
+    compiler_flush_jsr_stack();
+    MakeSR();
+
+    if (!regs.s) {
+	regs.usp = m68k_areg(regs, 7);
+	if (currprefs.cpu_level >= 2)
+	    m68k_areg(regs, 7) = regs.m ? regs.msp : regs.isp;
+	else
+	    m68k_areg(regs, 7) = regs.isp;
+	regs.s = 1;
+    }
+
+	if (nr == 2 && in_exception_2++)	{
+		write_log("HALT: Double Bus Error means bad news!\n");
+		abort();
+	}
+
+    if (currprefs.cpu_level > 0) {
+		
+		if (nr == 2)	{
+			write_log("Exception 2!!\n");
+	
+			if (currprefs.cpu_level == 5)	{
+				/* 68060 */
+				exc_make_frame(4,
+						regs.sr,
+						currpc,
+						nr,
+						regs.mmu_fault_addr, /* fault address */
+						regs.mmu_fslw /* fault status long-word */
+						);
+			}
+			else if (currprefs.cpu_level == 4)	{
+				/* 68040 */
+				exc_push_long(0);	/* PD3 */
+				exc_push_long(0);	/* PD2 */
+				exc_push_long(0);	/* PD1 */
+				exc_push_long(0);	/* PD0/WB1D */
+				exc_push_long(0);	/* WB1A */
+				exc_push_long(0);	/* WB2D */
+				exc_push_long(0);	/* WB2A */
+				exc_push_long(0);	/* WB3D */
+				exc_push_long(0);	/* WB3A */
+				exc_push_long(regs.mmu_fault_addr);	
+				exc_push_word(0);	/* WB1S */
+				exc_push_word(0);	/* WB2S */
+				exc_push_word(0);	/* WB3S */
+				exc_push_word(0);	/* WB3S */
+				exc_push_word(regs.mmu_ssw);
+				exc_push_long(regs.mmu_fault_addr);	/* EA */
+				exc_make_frame(7,
+					regs.sr,
+					currpc,
+					2,
+					0,
+					0);
+	    }
+		}
+		else if (nr == 3) {
+			int i;
+
+			exc_make_frame(2,
+				regs.sr,
+				last_addr_for_exception_3,
+				nr,
+				last_fault_for_exception_3 & 0xfffffffe,	
+				0
+			);
+	
+	} else if (nr ==5 || nr == 6 || nr == 7 || nr == 9) {
+			/* div by zero, CHK, TRAP or TRACE */
+			exc_make_frame(2,
+				regs.sr,
+				currpc,
+				nr,
+				oldpc,
+				0
+			);
+	} else if (regs.m && nr >= 24 && nr < 32) {
+			/* interrupts! */
+			exc_make_frame(0,
+					regs.sr,
+					currpc,
+					nr,
+					0, 0);
+	    regs.sr |= (1 << 13);
+	    regs.msp = m68k_areg(regs, 7);
+	    m68k_areg(regs, 7) = regs.isp;
+			
+			exc_make_frame(1,	/* throwaway */
+					regs.sr,
+					currpc,
+					nr,
+					0, 0);
+	} else {
+			exc_make_frame(0,
+					regs.sr,
+					currpc,
+					nr,
+					0, 0);
+	}
+    } else {
+	if (nr == 2 || nr == 3) {
+			write_log ("Exception %d! -- this code needs rewriting - good luck!\n", nr);
+
+			exc_push_long(last_fault_for_exception_3);
+			exc_push_word(last_op_for_exception_3);
+			exc_push_long(last_addr_for_exception_3);
+	}
+		exc_push_word(regs.sr);
+    }
+    m68k_setpc (get_long (regs.vbr + 4*nr));
+	if (nr < 24 && nr >= 32)
+		write_log("EXCEPTION: %02d handler @ %lx SP=%lx\n", nr, m68k_getpc(), m68k_areg(regs, 7)); 
+    fill_prefetch_0 ();
+    regs.t1 = regs.t0 = regs.m = 0;
+    unset_special (SPCFLAG_TRACE | SPCFLAG_DOTRACE);
+}
+
+static void Interrupt (int nr)
+{
+    assert(nr < 8 && nr >= 0);
+    lastint_regs = regs;
+    lastint_no = nr;
+    Exception(nr+24, 0);
+
+    regs.intmask = nr;
+    set_special (SPCFLAG_INT);
+}
+
+
+int m68k_move2c (int regno, uae_u32 *regp)
+{
+	/* 0x808 is the PCR on an '060 */
+    if ((currprefs.cpu_level == 1 && (regno & 0x7FF) > 1)
+	|| (currprefs.cpu_level < 4 && (regno & 0x7FF) > 2)
+	|| (currprefs.cpu_level == 4 && regno == 0x802))
+    {
+	op_illg (0x4E7B);
+	return 0;
+    } else {
+	switch (regno) {
+	case 0: regs.sfc = *regp & 7; /*write_log("SFC set to %d\n", regs.sfc);*/ break;
+	case 1: regs.dfc = *regp & 7; /*write_log("DFC set to %d\n", regs.dfc);*/ break;
+	case 2: regs.cacr = *regp & (currprefs.cpu_level < 4 ? 0x3 : 0x80008000); break;
+	case 3: mmu_set_tc(*regp & 0xc000); break;
+	    /* Mask out fields that should be zero.  */
+	case 4: case 5: case 6: case 7:
+			mmu_set_ttr(regno, *regp & 0xffffe364); break;
+	  
+	case 0x800: regs.usp = *regp; break;
+	case 0x801: regs.vbr = *regp; break;
+	case 0x802: regs.caar = *regp & 0xfc; break;
+	case 0x803: regs.msp = *regp; if (regs.m == 1) m68k_areg(regs, 7) = regs.msp; break;
+	case 0x804: regs.isp = *regp; if (regs.m == 0) m68k_areg(regs, 7) = regs.isp; break;
+	case 0x805: mmu_set_mmusr(*regp); break;
+	case 0x806: case 0x807:
+				mmu_set_root_pointer(regno, *regp); break;
+	default:
+		write_log("move2x cpu=%d regno=%lx val=%lx\n", currprefs.cpu_level, regno, *regp);
+	    op_illg (0x4E7B);
+	    return 0;
+	}
+    }
+    return 1;
+}
+
+int m68k_movec2 (int regno, uae_u32 *regp)
+{
+	/* 0x808 is the PCR on an '060 */
+    if ((currprefs.cpu_level == 1 && (regno & 0x7FF) > 1)
+	|| (currprefs.cpu_level < 4 && (regno & 0x7FF) > 2)
+	|| (currprefs.cpu_level == 4 && regno == 0x802))
+    {
+		write_log("movec2 cpu=%d regno=%lx\n", currprefs.cpu_level, regno);
+	op_illg (0x4E7A);
+	return 0;
+    } else {
+	switch (regno) {
+	case 0: *regp = regs.sfc; break;
+	case 1: *regp = regs.dfc; break;
+	case 2: *regp = regs.cacr; break;
+	case 3: *regp = regs.tc; break;
+	case 4: *regp = regs.itt0; break;
+	case 5: *regp = regs.itt1; break;
+	case 6: *regp = regs.dtt0; break;
+	case 7: *regp = regs.dtt1; break;
+	case 0x800: *regp = regs.usp; break;
+	case 0x801: *regp = regs.vbr; break;
+	case 0x802: *regp = regs.caar; break;
+	case 0x803: *regp = regs.m == 1 ? m68k_areg(regs, 7) : regs.msp; break;
+	case 0x804: *regp = regs.m == 0 ? m68k_areg(regs, 7) : regs.isp; break;
+	case 0x805: *regp = regs.mmusr; break;
+	case 0x806: *regp = regs.urp; break;
+	case 0x807: *regp = regs.srp; break;
+	default:
+		write_log("movec2 cpu=%d regno=%lx\n", currprefs.cpu_level, regno);
+	    op_illg (0x4E7A);
+	    return 0;
+	}
+    }
+    return 1;
+}
+
+STATIC_INLINE int
+div_unsigned(uae_u32 src_hi, uae_u32 src_lo, uae_u32 div, uae_u32 *quot, uae_u32 *rem)
+{
+	uae_u32 q = 0, cbit = 0;
+	int i;
+
+	if (div <= src_hi) {
+	    return 1;
+	}
+	for (i = 0 ; i < 32 ; i++) {
+		cbit = src_hi & 0x80000000ul;
+		src_hi <<= 1;
+		if (src_lo & 0x80000000ul) src_hi++;
+		src_lo <<= 1;
+		q = q << 1;
+		if (cbit || div <= src_hi) {
+			q |= 1;
+			src_hi -= div;
+		}
+	}
+	*quot = q;
+	*rem = src_hi;
+	return 0;
+}
+
+void m68k_divl (uae_u32 opcode, uae_u32 src, uae_u16 extra, uaecptr oldpc)
+{
+#if defined(uae_s64)
+    if (src == 0) {
+	Exception (5, oldpc);
+	return;
+    }
+    if (extra & 0x800) {
+	/* signed variant */
+	uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7);
+	uae_s64 quot, rem;
+
+	if (extra & 0x400) {
+	    a &= 0xffffffffu;
+	    a |= (uae_s64)m68k_dreg(regs, extra & 7) << 32;
+	}
+	rem = a % (uae_s64)(uae_s32)src;
+	quot = a / (uae_s64)(uae_s32)src;
+	if ((quot & UVAL64(0xffffffff80000000)) != 0
+	    && (quot & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000))
+	{
+	    SET_VFLG (1);
+	    SET_NFLG (1);
+	    SET_CFLG (0);
+	} else {
+	    if (((uae_s32)rem < 0) != ((uae_s64)a < 0)) rem = -rem;
+	    SET_VFLG (0);
+	    SET_CFLG (0);
+	    SET_ZFLG (((uae_s32)quot) == 0);
+	    SET_NFLG (((uae_s32)quot) < 0);
+	    m68k_dreg(regs, extra & 7) = rem;
+	    m68k_dreg(regs, (extra >> 12) & 7) = quot;
+	}
+    } else {
+	/* unsigned */
+	uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7);
+	uae_u64 quot, rem;
+
+	if (extra & 0x400) {
+	    a &= 0xffffffffu;
+	    a |= (uae_u64)m68k_dreg(regs, extra & 7) << 32;
+	}
+	rem = a % (uae_u64)src;
+	quot = a / (uae_u64)src;
+	if (quot > 0xffffffffu) {
+	    SET_VFLG (1);
+	    SET_NFLG (1);
+	    SET_CFLG (0);
+	} else {
+	    SET_VFLG (0);
+	    SET_CFLG (0);
+	    SET_ZFLG (((uae_s32)quot) == 0);
+	    SET_NFLG (((uae_s32)quot) < 0);
+	    m68k_dreg(regs, extra & 7) = rem;
+	    m68k_dreg(regs, (extra >> 12) & 7) = quot;
+	}
+    }
+#else
+    if (src == 0) {
+	Exception (5, oldpc);
+	return;
+    }
+    if (extra & 0x800) {
+	/* signed variant */
+	uae_s32 lo = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7);
+	uae_s32 hi = lo < 0 ? -1 : 0;
+	uae_s32 save_high;
+	uae_u32 quot, rem;
+	uae_u32 sign;
+
+	if (extra & 0x400) {
+	    hi = (uae_s32)m68k_dreg(regs, extra & 7);
+	}
+	save_high = hi;
+	sign = (hi ^ src);
+	if (hi < 0) {
+	    hi = ~hi;
+	    lo = -lo;
+	    if (lo == 0) hi++;
+	}
+	if ((uae_s32)src < 0) src = -src;
+	if (div_unsigned(hi, lo, src, &quot, &rem) ||
+	    (sign & 0x80000000) ? quot > 0x80000000 : quot > 0x7fffffff) {
+	    SET_VFLG (1);
+	    SET_NFLG (1);
+	    SET_CFLG (0);
+	} else {
+	    if (sign & 0x80000000) quot = -quot;
+	    if (((uae_s32)rem < 0) != (save_high < 0)) rem = -rem;
+	    SET_VFLG (0);
+	    SET_CFLG (0);
+	    SET_ZFLG (((uae_s32)quot) == 0);
+	    SET_NFLG (((uae_s32)quot) < 0);
+	    m68k_dreg(regs, extra & 7) = rem;
+	    m68k_dreg(regs, (extra >> 12) & 7) = quot;
+	}
+    } else {
+	/* unsigned */
+	uae_u32 lo = (uae_u32)m68k_dreg(regs, (extra >> 12) & 7);
+	uae_u32 hi = 0;
+	uae_u32 quot, rem;
+
+	if (extra & 0x400) {
+	    hi = (uae_u32)m68k_dreg(regs, extra & 7);
+	}
+	if (div_unsigned(hi, lo, src, &quot, &rem)) {
+	    SET_VFLG (1);
+	    SET_NFLG (1);
+	    SET_CFLG (0);
+	} else {
+	    SET_VFLG (0);
+	    SET_CFLG (0);
+	    SET_ZFLG (((uae_s32)quot) == 0);
+	    SET_NFLG (((uae_s32)quot) < 0);
+	    m68k_dreg(regs, extra & 7) = rem;
+	    m68k_dreg(regs, (extra >> 12) & 7) = quot;
+	}
+    }
+#endif
+}
+
+STATIC_INLINE void
+mul_unsigned(uae_u32 src1, uae_u32 src2, uae_u32 *dst_hi, uae_u32 *dst_lo)
+{
+	uae_u32 r0 = (src1 & 0xffff) * (src2 & 0xffff);
+	uae_u32 r1 = ((src1 >> 16) & 0xffff) * (src2 & 0xffff);
+	uae_u32 r2 = (src1 & 0xffff) * ((src2 >> 16) & 0xffff);
+	uae_u32 r3 = ((src1 >> 16) & 0xffff) * ((src2 >> 16) & 0xffff);
+	uae_u32 lo;
+
+	lo = r0 + ((r1 << 16) & 0xffff0000ul);
+	if (lo < r0) r3++;
+	r0 = lo;
+	lo = r0 + ((r2 << 16) & 0xffff0000ul);
+	if (lo < r0) r3++;
+	r3 += ((r1 >> 16) & 0xffff) + ((r2 >> 16) & 0xffff);
+	*dst_lo = lo;
+	*dst_hi = r3;
+}
+
+void m68k_mull (uae_u32 opcode, uae_u32 src, uae_u16 extra)
+{
+#if defined(uae_s64)
+    if (extra & 0x800) {
+	/* signed variant */
+	uae_s64 a = (uae_s64)(uae_s32)m68k_dreg(regs, (extra >> 12) & 7);
+
+	a *= (uae_s64)(uae_s32)src;
+	SET_VFLG (0);
+	SET_CFLG (0);
+	SET_ZFLG (a == 0);
+	SET_NFLG (a < 0);
+	if (extra & 0x400)
+	    m68k_dreg(regs, extra & 7) = a >> 32;
+	else if ((a & UVAL64(0xffffffff80000000)) != 0
+		 && (a & UVAL64(0xffffffff80000000)) != UVAL64(0xffffffff80000000))
+	{
+	    SET_VFLG (1);
+	}
+	m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a;
+    } else {
+	/* unsigned */
+	uae_u64 a = (uae_u64)(uae_u32)m68k_dreg(regs, (extra >> 12) & 7);
+
+	a *= (uae_u64)src;
+	SET_VFLG (0);
+	SET_CFLG (0);
+	SET_ZFLG (a == 0);
+	SET_NFLG (((uae_s64)a) < 0);
+	if (extra & 0x400)
+	    m68k_dreg(regs, extra & 7) = a >> 32;
+	else if ((a & UVAL64(0xffffffff00000000)) != 0) {
+	    SET_VFLG (1);
+	}
+	m68k_dreg(regs, (extra >> 12) & 7) = (uae_u32)a;
+    }
+#else
+    if (extra & 0x800) {
+	/* signed variant */
+	uae_s32 src1,src2;
+	uae_u32 dst_lo,dst_hi;
+	uae_u32 sign;
+
+	src1 = (uae_s32)src;
+	src2 = (uae_s32)m68k_dreg(regs, (extra >> 12) & 7);
+	sign = (src1 ^ src2);
+	if (src1 < 0) src1 = -src1;
+	if (src2 < 0) src2 = -src2;
+	mul_unsigned((uae_u32)src1,(uae_u32)src2,&dst_hi,&dst_lo);
+	if (sign & 0x80000000) {
+		dst_hi = ~dst_hi;
+		dst_lo = -dst_lo;
+		if (dst_lo == 0) dst_hi++;
+	}
+	SET_VFLG (0);
+	SET_CFLG (0);
+	SET_ZFLG (dst_hi == 0 && dst_lo == 0);
+	SET_NFLG (((uae_s32)dst_hi) < 0);
+	if (extra & 0x400)
+	    m68k_dreg(regs, extra & 7) = dst_hi;
+	else if ((dst_hi != 0 || (dst_lo & 0x80000000) != 0)
+		 && ((dst_hi & 0xffffffff) != 0xffffffff
+		     || (dst_lo & 0x80000000) != 0x80000000))
+	{
+	    SET_VFLG (1);
+	}
+	m68k_dreg(regs, (extra >> 12) & 7) = dst_lo;
+    } else {
+	/* unsigned */
+	uae_u32 dst_lo,dst_hi;
+
+	mul_unsigned(src,(uae_u32)m68k_dreg(regs, (extra >> 12) & 7),&dst_hi,&dst_lo);
+
+	SET_VFLG (0);
+	SET_CFLG (0);
+	SET_ZFLG (dst_hi == 0 && dst_lo == 0);
+	SET_NFLG (((uae_s32)dst_hi) < 0);
+	if (extra & 0x400)
+	    m68k_dreg(regs, extra & 7) = dst_hi;
+	else if (dst_hi != 0) {
+	    SET_VFLG (1);
+	}
+	m68k_dreg(regs, (extra >> 12) & 7) = dst_lo;
+    }
+#endif
+}
+static char* ccnames[] =
+{ "T ","F ","HI","LS","CC","CS","NE","EQ",
+  "VC","VS","PL","MI","GE","LT","GT","LE" };
+
+void m68k_reset (void)
+{
+	write_log("M68K: RESET!\n");
+	mmu_set_tc(regs.tc & ~0x8000); /* disable mmu */
+    regs.kick_mask = 0x00F80000;
+    regs.spcflags = 0;
+    if (savestate_state == STATE_RESTORE) {
+        m68k_setpc (regs.pc);
+	/* MakeFromSR() must not swap stack pointer */
+	regs.s = (regs.sr >> 13) & 1;
+	MakeFromSR();
+	/* set stack pointer */
+	if (regs.s)
+	    m68k_areg(regs, 7) = regs.isp;
+	else
+	    m68k_areg(regs, 7) = regs.usp;
+	return;
+    }
+
+    m68k_areg (regs, 7) = phys_get_long (0x00f80000);
+    m68k_setpc (phys_get_long (0x00f80004));
+    refill_prefetch (m68k_getpc (), 0);
+    fill_prefetch_0 ();
+    regs.s = 1;
+    regs.m = 0;
+    regs.stopped = 0;
+    regs.t1 = 0;
+    regs.t0 = 0;
+    SET_ZFLG (0);
+    SET_XFLG (0);
+    SET_CFLG (0);
+    SET_VFLG (0);
+    SET_NFLG (0);
+    regs.intmask = 7;
+    regs.vbr = regs.sfc = regs.dfc = 0;
+    regs.fpcr = regs.fpsr = regs.fpiar = 0;
+}
+
+unsigned long REGPARAM2 op_illg (uae_u32 opcode)
+{
+    uaecptr pc = m68k_getpc ();
+    
+    if (cloanto_rom && (opcode & 0xF100) == 0x7100) {
+	m68k_dreg (regs, (opcode >> 9) & 7) = (uae_s8)(opcode & 0xFF);
+	m68k_incpc (2);
+	fill_prefetch_0 ();
+	return 4;
+    }
+
+    compiler_flush_jsr_stack ();
+    if (opcode == 0x4E7B && phys_get_long (0x10) == 0 && (pc & 0xF80000) == 0xF80000) {
+	write_log ("Your Kickstart requires a 68020 CPU. Giving up.\n");
+	broken_in = 1;
+	set_special (SPCFLAG_BRK);
+	quit_program = 1;
+    }
+    if (opcode == 0xFF0D) {
+	if ((pc & 0xF80000) == 0xF80000) {
+	    /* This is from the dummy Kickstart replacement */
+	    uae_u16 arg = get_iword (2);
+	    m68k_incpc (4);
+	    ersatz_perform (arg);
+	    fill_prefetch_0 ();
+	    return 4;
+	} else if ((pc & 0xFFFF0000) == RTAREA_BASE) {
+	    /* User-mode STOP replacement */
+	    m68k_setstopped (1);
+	    return 4;
+	}
+    }
+
+    if ((opcode & 0xF000) == 0xA000 && (pc & 0xFFFF0000) == RTAREA_BASE) {
+	/* Calltrap. */
+	m68k_incpc(2);
+	call_calltrap (opcode & 0xFFF);
+	fill_prefetch_0 ();
+	return 4;
+    }
+
+    if ((opcode & 0xF000) == 0xF000) {
+	Exception(0xB,0);
+	return 4;
+    }
+    if ((opcode & 0xF000) == 0xA000) {
+	if ((pc & 0xFFFF0000) == RTAREA_BASE) {
+	    /* Calltrap. */
+	    call_calltrap (opcode & 0xFFF);
+	}
+	Exception(0xA,0);
+	return 4;
+    }
+#if 1
+    write_log ("Illegal instruction: %04x at %08lx\n", opcode, pc);
+#endif
+    Exception (4,0);
+    return 4;
+}
+
+
+
+static int n_insns = 0, n_spcinsns = 0;
+
+static uaecptr last_trace_ad = 0;
+
+static void do_trace (void)
+{
+    if (regs.t0 && currprefs.cpu_level >= 2) {
+	uae_u16 opcode;
+	/* should also include TRAP, CHK, SR modification FPcc */
+	/* probably never used so why bother */
+	/* We can afford this to be inefficient... */
+	m68k_setpc (m68k_getpc ());
+	fill_prefetch_0 ();
+	opcode = get_word (regs.pc);
+	if (opcode == 0x4e72 		/* RTE */
+	    || opcode == 0x4e74 		/* RTD */
+	    || opcode == 0x4e75 		/* RTS */
+	    || opcode == 0x4e77 		/* RTR */
+	    || opcode == 0x4e76 		/* TRAPV */
+	    || (opcode & 0xffc0) == 0x4e80 	/* JSR */
+	    || (opcode & 0xffc0) == 0x4ec0 	/* JMP */
+	    || (opcode & 0xff00) == 0x6100  /* BSR */
+	    || ((opcode & 0xf000) == 0x6000	/* Bcc */
+		&& cctrue((opcode >> 8) & 0xf))
+	    || ((opcode & 0xf0f0) == 0x5050 /* DBcc */
+		&& !cctrue((opcode >> 8) & 0xf)
+		&& (uae_s16)m68k_dreg(regs, opcode & 7) != 0))
+	{
+	    last_trace_ad = m68k_getpc ();
+	    unset_special (SPCFLAG_TRACE);
+	    set_special (SPCFLAG_DOTRACE);
+	}
+    } else if (regs.t1) {
+	last_trace_ad = m68k_getpc ();
+	unset_special (SPCFLAG_TRACE);
+	set_special (SPCFLAG_DOTRACE);
+    }
+}
+
+static int do_specialties (int cycles)
+{
+    if (regs.spcflags & SPCFLAG_COPPER)
+	do_copper ();
+
+    /*n_spcinsns++;*/
+    while ((regs.spcflags & SPCFLAG_BLTNASTY) && cycles > 0) {
+	int c = blitnasty();
+	if (!c) {
+	    cycles -= 2 * CYCLE_UNIT;
+	    if (cycles < CYCLE_UNIT)
+		cycles = 0;
+	    c = 1;
+	}
+	do_cycles (c * CYCLE_UNIT);
+	if (regs.spcflags & SPCFLAG_COPPER)
+	    do_copper ();
+    }
+
+    run_compiled_code();
+    if (regs.spcflags & SPCFLAG_DOTRACE) {
+	Exception (9,last_trace_ad);
+    }
+    while (regs.spcflags & SPCFLAG_STOP) {
+	do_cycles (4 * CYCLE_UNIT);
+	if (regs.spcflags & SPCFLAG_COPPER)
+	    do_copper ();
+	if (regs.spcflags & (SPCFLAG_INT | SPCFLAG_DOINT)){
+	    int intr = intlev ();
+	    unset_special (SPCFLAG_INT | SPCFLAG_DOINT);
+	    if (intr != -1 && intr > regs.intmask) {
+		Interrupt (intr);
+		regs.stopped = 0;
+		unset_special (SPCFLAG_STOP);
+	    }
+	}
+    }
+    if (regs.spcflags & SPCFLAG_TRACE)
+	do_trace ();
+
+    if (regs.spcflags & SPCFLAG_DOINT) {
+	int intr = intlev ();
+	unset_special (SPCFLAG_DOINT);
+	if (intr != -1 && intr > regs.intmask) {
+	    Interrupt (intr);
+	    regs.stopped = 0;
+	}
+    }
+    if (regs.spcflags & SPCFLAG_INT) {
+	unset_special (SPCFLAG_INT);
+	set_special (SPCFLAG_DOINT);
+    }
+    if (regs.spcflags & (SPCFLAG_BRK | SPCFLAG_MODE_CHANGE)) {
+	unset_special (SPCFLAG_BRK | SPCFLAG_MODE_CHANGE);
+	return 1;
+    }
+    return 0;
+}
+
+/* It's really sad to have two almost identical functions for this, but we
+   do it all for performance... :( */
+static void m68k_run_1 (void)
+{
+#ifdef DEBUG_PREFETCH
+    uae_u8 saved_bytes[20];
+    uae_u16 *oldpcp;
+#endif
+    for (;;) {
+	int cycles;
+	uae_u32 opcode = get_iword_prefetch (0);
+#ifdef DEBUG_PREFETCH
+	if (get_ilong (0) != do_get_mem_long (&regs.prefetch)) {
+	    write_log ("Prefetch differs from memory.\n");
+	    debugging = 1;
+	    return;
+	}
+	oldpcp = regs.pc_p;
+	memcpy (saved_bytes, regs.pc_p, 20);
+#endif
+	/* assert (!regs.stopped && !(regs.spcflags & SPCFLAG_STOP)); */
+/*	regs_backup[backup_pointer = (backup_pointer + 1) % 16] = regs;*/
+#if COUNT_INSTRS == 2
+	if (table68k[opcode].handler != -1)
+	    instrcount[table68k[opcode].handler]++;
+#elif COUNT_INSTRS == 1
+	instrcount[opcode]++;
+#endif
+#if defined X86_ASSEMBLY
+	__asm__ __volatile__("\tcall *%%ebx"
+			     : "=&a" (cycles) : "b" (cpufunctbl[opcode]), "0" (opcode)
+			     : "%edx", "%ecx",
+			     "%esi", "%edi", "%ebp", "memory", "cc");
+#else
+	cycles = (*cpufunctbl[opcode])(opcode);
+#endif
+#ifdef DEBUG_PREFETCH
+	if (memcmp (saved_bytes, oldpcp, 20) != 0) {
+	    write_log ("Self-modifying code detected.\n");
+	    set_special (SPCFLAG_BRK);
+	    debugging = 1;
+	}
+#endif
+	/*n_insns++;*/
+	cycles &= cycles_mask;
+	cycles |= cycles_val;
+	do_cycles (cycles);
+	if (regs.spcflags) {
+	    if (do_specialties (cycles))
+		return;
+	}
+    }
+}
+
+#define DEBUG_PREFETCH
+
+/* Same thing, but don't use prefetch to get opcode.  */
+static void m68k_run_2 (void)
+{
+    for (;;) {
+	int cycles;
+	uae_u32 opcode = get_iword (0);
+
+	/* assert (!regs.stopped && !(regs.spcflags & SPCFLAG_STOP)); */
+/*	regs_backup[backup_pointer = (backup_pointer + 1) % 16] = regs;*/
+#if COUNT_INSTRS == 2
+	if (table68k[opcode].handler != -1)
+	    instrcount[table68k[opcode].handler]++;
+#elif COUNT_INSTRS == 1
+	instrcount[opcode]++;
+#endif
+#if defined X86_ASSEMBLY
+	__asm__ __volatile__("\tcall *%%ebx"
+			     : "=&a" (cycles) : "b" (cpufunctbl[opcode]), "0" (opcode)
+			     : "%edx", "%ecx",
+			     "%esi", "%edi", "%ebp", "memory", "cc");
+#else
+	cycles = (*cpufunctbl[opcode])(opcode);
+#endif
+
+	/*n_insns++;*/
+	cycles &= cycles_mask;
+	cycles |= cycles_val;
+        do_cycles (cycles);
+	if (regs.spcflags) {
+	    if (do_specialties (cycles))
+		return;
+	}
+    }
+}
+
+#ifdef X86_ASSEMBLY
+STATIC_INLINE void m68k_run1 (void (*func)(void))
+{
+    /* Work around compiler bug: GCC doesn't push %ebp in m68k_run_1. */
+    __asm__ __volatile__ ("pushl %%ebp\n\tcall *%0\n\tpopl %%ebp"
+			  : : "r" (func) : "%eax", "%edx", "%ecx", "memory", "cc");
+}
+#else
+#define m68k_run1(F) (F) ()
+#endif
+
+int in_m68k_go = 0;
+
+void m68k_go (int may_quit)
+{
+    if (in_m68k_go || !may_quit) {
+	write_log ("Bug! m68k_go is not reentrant.\n");
+	abort ();
+    }
+
+    reset_frame_rate_hack ();
+    update_68k_cycles ();
+
+#if MMU_SETJMP_EXCEPTIONS
+	setjmp(m68k_exception);
+#endif
+
+    in_m68k_go++;
+    for (;;) {
+	if (quit_program > 0) {
+	    if (quit_program == 1)
+		break;
+	    quit_program = 0;
+	    if (savestate_state == STATE_RESTORE) {
+		restore_state (savestate_filename);
+#if 0
+		activate_debugger ();
+#endif
+	    }
+	    m68k_reset ();
+	    reset_all_systems ();
+	    customreset ();
+	    /* We may have been restoring state, but we're done now.  */
+	    savestate_restore_finish ();
+	    handle_active_events ();
+	    if (regs.spcflags)
+		do_specialties (0);
+	}
+
+	if (debugging)
+	    debug ();
+	m68k_run1 (currprefs.cpu_compatible ? m68k_run_1 : m68k_run_2);
+    }
+    in_m68k_go--;
+}
+
+static void m68k_verify (uaecptr addr, uaecptr *nextpc)
+{
+    uae_u32 opcode, val;
+    struct instr *dp;
+
+    opcode = get_iword_1(0);
+    last_op_for_exception_3 = opcode;
+    m68kpc_offset = 2;
+
+    if (cpufunctbl[opcode] == op_illg_1) {
+	opcode = 0x4AFC;
+    }
+    dp = table68k + opcode;
+
+    if (dp->suse) {
+	if (!verify_ea (dp->sreg, dp->smode, dp->size, &val)) {
+	    Exception (3, 0);
+	    return;
+	}
+    }
+    if (dp->duse) {
+	if (!verify_ea (dp->dreg, dp->dmode, dp->size, &val)) {
+	    Exception (3, 0);
+	    return;
+	}
+    }
+}
+
+void m68k_disasm (FILE *f, uaecptr addr, uaecptr *nextpc, int cnt)
+{
+    uaecptr newpc = 0;
+    m68kpc_offset = addr - m68k_getpc ();
+    while (cnt-- > 0) {
+	char instrname[20],*ccpt;
+	int opwords;
+	uae_u32 opcode;
+	struct mnemolookup *lookup;
+	struct instr *dp;
+
+	fprintf (f, "%08lx: ", m68k_getpc () + m68kpc_offset);
+	for (opwords = 0; opwords < 5; opwords++){
+	    fprintf (f, "%04x ", get_iword_1 (m68kpc_offset + opwords*2));
+	}
+	opcode = get_iword_1 (m68kpc_offset);
+	m68kpc_offset += 2;
+	if (cpufunctbl[opcode] == op_illg_1) {
+	    opcode = 0x4AFC;
+	}
+	dp = table68k + opcode;
+	for (lookup = lookuptab;lookup->mnemo != dp->mnemo; lookup++)
+	    ;
+
+	strcpy (instrname, lookup->name);
+	ccpt = strstr (instrname, "cc");
+	if (ccpt != 0) {
+	    strncpy (ccpt, ccnames[dp->cc], 2);
+	}
+	fprintf (f, "%s", instrname);
+	switch (dp->size){
+	 case sz_byte: fprintf (f, ".B "); break;
+	 case sz_word: fprintf (f, ".W "); break;
+	 case sz_long: fprintf (f, ".L "); break;
+	 default: fprintf (f, "   "); break;
+	}
+
+	if (dp->suse) {
+	    newpc = m68k_getpc () + m68kpc_offset;
+	    newpc += ShowEA (f, dp->sreg, dp->smode, dp->size, 0);
+	}
+	if (dp->suse && dp->duse)
+	    fprintf (f, ",");
+	if (dp->duse) {
+	    newpc = m68k_getpc () + m68kpc_offset;
+	    newpc += ShowEA (f, dp->dreg, dp->dmode, dp->size, 0);
+	}
+	if (ccpt != 0) {
+	    if (cctrue(dp->cc))
+		fprintf (f, " == %08lx (TRUE)", newpc);
+	    else
+		fprintf (f, " == %08lx (FALSE)", newpc);
+	} else if ((opcode & 0xff00) == 0x6100) /* BSR */
+	    fprintf (f, " == %08lx", newpc);
+	fprintf (f, "\n");
+    }
+    if (nextpc)
+	*nextpc = m68k_getpc () + m68kpc_offset;
+}
+
+void m68k_dumpstate (FILE *f, uaecptr *nextpc)
+{
+    int i;
+    for (i = 0; i < 8; i++){
+	fprintf (f, "D%d: %08lx ", i, m68k_dreg(regs, i));
+	if ((i & 3) == 3) fprintf (f, "\n");
+    }
+    for (i = 0; i < 8; i++){
+	fprintf (f, "A%d: %08lx ", i, m68k_areg(regs, i));
+	if ((i & 3) == 3) fprintf (f, "\n");
+    }
+    if (regs.s == 0) regs.usp = m68k_areg(regs, 7);
+    if (regs.s && regs.m) regs.msp = m68k_areg(regs, 7);
+    if (regs.s && regs.m == 0) regs.isp = m68k_areg(regs, 7);
+    fprintf (f, "USP=%08lx ISP=%08lx MSP=%08lx VBR=%08lx\n",
+	     regs.usp,regs.isp,regs.msp,regs.vbr);
+    fprintf (f, "T=%d%d S=%d M=%d X=%d N=%d Z=%d V=%d C=%d IMASK=%d\n",
+	     regs.t1, regs.t0, regs.s, regs.m,
+	     GET_XFLG, GET_NFLG, GET_ZFLG, GET_VFLG, GET_CFLG, regs.intmask);
+    for (i = 0; i < 8; i++){
+	fprintf (f, "FP%d: %g ", i, regs.fp[i]);
+	if ((i & 3) == 3) fprintf (f, "\n");
+    }
+    fprintf (f, "N=%d Z=%d I=%d NAN=%d\n",
+	     (regs.fpsr & 0x8000000) != 0,
+	     (regs.fpsr & 0x4000000) != 0,
+	     (regs.fpsr & 0x2000000) != 0,
+	     (regs.fpsr & 0x1000000) != 0);
+    if (currprefs.cpu_compatible)
+	fprintf (f, "prefetch %08lx\n", (unsigned long)do_get_mem_long(&regs.prefetch));
+
+	fprintf (f, "this PC: %08lx\n", m68k_getpc());
+    m68k_disasm (f, m68k_getpc (), nextpc, 1);
+    if (nextpc)
+	fprintf (f, "next PC: %08lx\n", *nextpc);
+}
+
+
+/* {{{ CPU save/restore code */
+
+#define CPUTYPE_EC 1
+#define CPUMODE_HALT 1
+
+uae_u8 *restore_cpu (uae_u8 *src)
+{
+    int i,model,flags;
+    uae_u32 l;
+
+    model = restore_u32();
+    switch (model) {
+    case 68000:
+	currprefs.cpu_level = 0;
+	break;
+    case 68010:
+	currprefs.cpu_level = 1;
+	break;
+    case 68020:
+	currprefs.cpu_level = 2;
+	break;
+    default:
+	write_log ("Unknown cpu type %d\n", model);
+	break;
+    }
+
+    flags = restore_u32();
+    currprefs.address_space_24 = 0;
+    if (flags & CPUTYPE_EC)
+	currprefs.address_space_24 = 1;
+    for (i = 0; i < 15; i++)
+	regs.regs[i] = restore_u32 ();
+    regs.pc = restore_u32 ();
+    /* We don't actually use this - we deliberately set prefetch_pc to a
+       zero so that prefetch isn't used for the first insn after a state
+       restore.  */
+    regs.prefetch = restore_u32 ();
+    regs.prefetch_pc = regs.pc + 128;
+    regs.usp = restore_u32 ();
+    regs.isp = restore_u32 ();
+    regs.sr = restore_u16 ();
+    l = restore_u32();
+    if (l & CPUMODE_HALT) {
+	regs.stopped = 1;
+	set_special (SPCFLAG_STOP);
+    } else
+	regs.stopped = 0;
+    if (model >= 68010) {
+	regs.dfc = restore_u32 ();
+	regs.sfc = restore_u32 ();
+	regs.vbr = restore_u32 ();
+    }
+    if (model >= 68020) {
+	regs.caar = restore_u32 ();
+	regs.cacr = restore_u32 ();
+	regs.msp = restore_u32 ();
+    }
+    write_log ("CPU %d%s%03d, PC=%08.8X\n",
+	       model/1000, flags & 1 ? "EC" : "", model % 1000, regs.pc);
+
+    return src;
+}
+
+static int cpumodel[] = { 68000, 68010, 68020, 68020 };
+
+uae_u8 *save_cpu (int *len)
+{
+    uae_u8 *dstbak,*dst;
+    int model,i;
+
+    dstbak = dst = malloc(4+4+15*4+4+4+4+4+2+4+4+4+4+4+4+4);
+    model = cpumodel[currprefs.cpu_level];
+    save_u32 (model);					/* MODEL */
+    save_u32 (currprefs.address_space_24 ? 1 : 0);	/* FLAGS */
+    for(i = 0;i < 15; i++) save_u32 (regs.regs[i]);	/* D0-D7 A0-A6 */
+    save_u32 (m68k_getpc ());				/* PC */
+    save_u32 (regs.prefetch);				/* prefetch */
+    MakeSR ();
+    save_u32 (!regs.s ? regs.regs[15] : regs.usp);	/* USP */
+    save_u32 (regs.s ? regs.regs[15] : regs.isp);	/* ISP */
+    save_u16 (regs.sr);				/* SR/CCR */
+    save_u32 (regs.stopped ? CPUMODE_HALT : 0);	/* flags */
+    if(model >= 68010) {
+	save_u32 (regs.dfc);				/* DFC */
+	save_u32 (regs.sfc);				/* SFC */
+	save_u32 (regs.vbr);				/* VBR */
+    }
+    if(model >= 68020) {
+	save_u32 (regs.caar);				/* CAAR */
+	save_u32 (regs.cacr);				/* CACR */
+	save_u32 (regs.msp);				/* MSP */
+    }
+    *len = dst - dstbak;
+    return dstbak;
+}
+
+/* }}} */
+
diff -urN src-0.8.22/src/picasso96.c src-0.8.22-mmu/src/picasso96.c
--- src-0.8.22/src/picasso96.c	2002-02-27 14:08:18.000000000 +0100
+++ src-0.8.22-mmu/src/picasso96.c	2003-07-25 12:36:00.000000000 +0200
@@ -131,7 +131,7 @@
 {
     int i;
     uaecptr amigamodeinfoptr;
-    struct LibResolution *uaememptr = (struct LibResolution *) get_mem_bank (amigalibresptr).xlateaddr (amigalibresptr);
+    struct LibResolution *uaememptr = (struct LibResolution *)get_real_address(amigalibresptr);
 
     return;
 
@@ -333,7 +333,7 @@
     char *uaememptr = 0;
     int i;
 
-    uaememptr = gfxmem_xlate (amigamemptr);	/* I know that amigamemptr is inside my gfxmem chunk, so I can just do the xlate() */
+    uaememptr = get_real_address(amigamemptr);
     memset (uaememptr, 0, PSSO_LibResolution_sizeof);	/* zero out our LibResolution structure */
     strcpy (uaememptr + PSSO_LibResolution_P96ID, libres->P96ID);
     put_long (amigamemptr + PSSO_LibResolution_DisplayID, libres->DisplayID);
@@ -767,7 +767,7 @@
 	res->Modes[TRUEALPHA] = amigamemptr;
 	break;
     }
-    uaememptr = gfxmem_xlate (amigamemptr);	/* I know that amigamemptr is inside my gfxmem chunk, so I can just do the xlate() */
+    uaememptr = get_real_address(amigamemptr);
     memset (uaememptr, 0, PSSO_ModeInfo_sizeof);	/* zero out our ModeInfo struct */
 
     put_word (amigamemptr + PSSO_ModeInfo_Width, dm->res.width);
@@ -839,6 +839,11 @@
     int ModeInfoStructureCount = 1, LibResolutionStructureCount = 0;
     uaecptr amigamemptr = 0;
     uaecptr AmigaBoardInfo = m68k_areg (regs, 2);
+
+    if (regs.mmu_enabled)	{
+        mmu_make_transparent_region(gfxmem_start, allocated_gfxmem, 1);
+    }
+	
     put_word (AmigaBoardInfo + PSSO_BoardInfo_BitsPerCannon, DX_BitsPerCannon ());
     put_word (AmigaBoardInfo + PSSO_BoardInfo_RGBFormats, picasso96_pixel_format);
     put_word (AmigaBoardInfo + PSSO_BoardInfo_SoftSpriteFlags, picasso96_pixel_format);
diff -urN src-0.8.22/src/picasso96.c~ src-0.8.22-mmu/src/picasso96.c~
--- src-0.8.22/src/picasso96.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/picasso96.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,2509 @@
+/*
+ * UAE - The U*nix Amiga Emulator
+ *
+ * Picasso96 Support Module
+ *
+ * Copyright 1997 Brian King <Brian_King@Mitel.com, Brian_King@Cloanto.com>
+ *
+ * Theory of operation:
+ * On the Amiga side, a Picasso card consists mainly of a memory area that
+ * contains the frame buffer.  On the UAE side, we allocate a block of memory
+ * that will hold the frame buffer.  This block is in normal memory, it is
+ * never directly on the graphics card.  All graphics operations, which are
+ * mainly reads and writes into this block and a few basic operations like
+ * filling a rectangle, operate on this block of memory.
+ * Since the memory is not on the graphics card, some work must be done to
+ * synchronize the display with the data in the Picasso frame buffer.  There
+ * are various ways to do this.  One possibility is to allocate a second
+ * buffer of the same size, and perform all write operations twice.  Since
+ * we never read from the second buffer, it can actually be placed in video
+ * memory.  The X11 driver could be made to use the Picasso frame buffer as
+ * the data buffer of an XImage, which could then be XPutImage()d from time
+ * to time.  Another possibility is to translate all Picasso accesses into
+ * Xlib (or GDI, or whatever your graphics system is) calls.  This possibility
+ * is a bit tricky, since there is a risk of generating very many single pixel
+ * accesses which may be rather slow.
+ *
+ * TODO:
+ * - add panning capability
+ * - we want to add a manual switch to override SetSwitch for hardware banging
+ *   programs started from a Picasso workbench.
+ */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "threaddep/thread.h"
+#include "uae.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "xwin.h"
+#include "picasso96.h"
+
+#ifdef PICASSO96
+
+#define P96TRACING_ENABLED 0
+#if P96TRACING_ENABLED
+#define P96TRACE(x)	do { write_log x; } while(0)
+#else
+#define P96TRACE(x)
+#endif
+
+static uae_u32 gfxmem_lget (uaecptr) REGPARAM;
+static uae_u32 gfxmem_wget (uaecptr) REGPARAM;
+static uae_u32 gfxmem_bget (uaecptr) REGPARAM;
+static void gfxmem_lput (uaecptr, uae_u32) REGPARAM;
+static void gfxmem_wput (uaecptr, uae_u32) REGPARAM;
+static void gfxmem_bput (uaecptr, uae_u32) REGPARAM;
+static int gfxmem_check (uaecptr addr, uae_u32 size) REGPARAM;
+static uae_u8 *gfxmem_xlate (uaecptr addr) REGPARAM;
+
+static void write_gfx_long (uaecptr addr, uae_u32 value);
+static void write_gfx_word (uaecptr addr, uae_u16 value);
+static void write_gfx_byte (uaecptr addr, uae_u8 value);
+
+static uae_u8 all_ones_bitmap, all_zeros_bitmap;
+
+struct picasso96_state_struct picasso96_state;
+struct picasso_vidbuf_description picasso_vidinfo;
+
+/* These are the maximum resolutions... They are filled in by GetSupportedResolutions() */
+/* have to fill this in, otherwise problems occur
+ * @@@ ??? what problems?
+ */
+struct ScreenResolution planar = { 320, 240 };
+struct ScreenResolution chunky = { 640, 480 };
+struct ScreenResolution hicolour = { 640, 480 };
+struct ScreenResolution truecolour = { 640, 480 };
+struct ScreenResolution alphacolour = { 640, 480 };
+
+uae_u16 picasso96_pixel_format = RGBFF_CHUNKY;
+
+struct PicassoResolution DisplayModes[MAX_PICASSO_MODES];
+
+static int mode_count = 0;
+
+static int set_gc_called = 0;
+static int set_panning_called = 0;
+/* Address of the screen in the Amiga frame buffer at the time of the last
+   SetPanning call.  */
+static uaecptr oldscr;
+
+static uae_u32 p2ctab[256][2];
+
+/*
+ * Debugging dumps
+ */
+
+static void DumpModeInfoStructure (uaecptr amigamodeinfoptr)
+{
+    write_log ("ModeInfo Structure Dump:\n");
+    write_log ("  Node.ln_Succ  = 0x%x\n", get_long (amigamodeinfoptr));
+    write_log ("  Node.ln_Pred  = 0x%x\n", get_long (amigamodeinfoptr + 4));
+    write_log ("  Node.ln_Type  = 0x%x\n", get_byte (amigamodeinfoptr + 8));
+    write_log ("  Node.ln_Pri   = %d\n", get_byte (amigamodeinfoptr + 9));
+    /*write_log ("  Node.ln_Name  = %s\n", uaememptr->Node.ln_Name); */
+    write_log ("  OpenCount     = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_OpenCount));
+    write_log ("  Active        = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_Active));
+    write_log ("  Width         = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_Width));
+    write_log ("  Height        = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_Height));
+    write_log ("  Depth         = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_Depth));
+    write_log ("  Flags         = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_Flags));
+    write_log ("  HorTotal      = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_HorTotal));
+    write_log ("  HorBlankSize  = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_HorBlankSize));
+    write_log ("  HorSyncStart  = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_HorSyncStart));
+    write_log ("  HorSyncSize   = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_HorSyncSize));
+    write_log ("  HorSyncSkew   = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_HorSyncSkew));
+    write_log ("  HorEnableSkew = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_HorEnableSkew));
+    write_log ("  VerTotal      = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_VerTotal));
+    write_log ("  VerBlankSize  = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_VerBlankSize));
+    write_log ("  VerSyncStart  = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_VerSyncStart));
+    write_log ("  VerSyncSize   = %d\n", get_word (amigamodeinfoptr + PSSO_ModeInfo_VerSyncSize));
+    write_log ("  Clock         = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_first_union));
+    write_log ("  ClockDivide   = %d\n", get_byte (amigamodeinfoptr + PSSO_ModeInfo_second_union));
+    write_log ("  PixelClock    = %d\n", get_long (amigamodeinfoptr + PSSO_ModeInfo_PixelClock));
+}
+
+static void DumpLibResolutionStructure (uaecptr amigalibresptr)
+{
+    int i;
+    uaecptr amigamodeinfoptr;
+	struct LibResolution *uaememptr = (struct LibResolution *)get_real_address(amigalibresptr);
+
+    return;
+
+    write_log ("LibResolution Structure Dump:\n");
+
+    if (get_long (amigalibresptr + PSSO_LibResolution_DisplayID) == 0xFFFFFFFF) {
+	write_log ("  Finished With LibResolutions...\n");
+    } else {
+	write_log ("  Name      = %s\n", uaememptr->P96ID);
+	write_log ("  DisplayID = 0x%x\n", get_long (amigalibresptr + PSSO_LibResolution_DisplayID));
+	write_log ("  Width     = %d\n", get_word (amigalibresptr + PSSO_LibResolution_Width));
+	write_log ("  Height    = %d\n", get_word (amigalibresptr + PSSO_LibResolution_Height));
+	write_log ("  Flags     = %d\n", get_word (amigalibresptr + PSSO_LibResolution_Flags));
+	for (i = 0; i < MAXMODES; i++) {
+	    amigamodeinfoptr = get_long (amigalibresptr + PSSO_LibResolution_Modes + i * 4);
+	    write_log ("  ModeInfo[%d] = 0x%x\n", i, amigamodeinfoptr);
+	    if (amigamodeinfoptr)
+		DumpModeInfoStructure (amigamodeinfoptr);
+	}
+	write_log ("  BoardInfo = 0x%x\n", get_long (amigalibresptr + PSSO_LibResolution_BoardInfo));
+    }
+}
+
+static char binary_byte[9];
+
+static char *BuildBinaryString (uae_u8 value)
+{
+    int i;
+    for (i = 0; i < 8; i++) {
+	binary_byte[i] = (value & (1 << (7 - i))) ? '#' : '.';
+    }
+    binary_byte[8] = '\0';
+    return binary_byte;
+}
+
+static void DumpPattern (struct Pattern *patt)
+{
+    uae_u8 *mem;
+    int row, col;
+    for (row = 0; row < (1 << patt->Size); row++) {
+	mem = patt->Memory + row * 2;
+	for (col = 0; col < 2; col++) {
+	    write_log ("%s", BuildBinaryString (*mem++));
+	}
+	write_log ("\n");
+    }
+}
+
+static void DumpTemplate (struct Template *tmp, uae_u16 w, uae_u16 h)
+{
+    uae_u8 *mem = tmp->Memory;
+    int row, col, width;
+    width = (w + 7) >> 3;
+    write_log ("xoffset = %d, bpr = %d\n", tmp->XOffset, tmp->BytesPerRow);
+    for (row = 0; row < h; row++) {
+	mem = tmp->Memory + row * tmp->BytesPerRow;
+	for (col = 0; col < width; col++) {
+	    write_log ("%s", BuildBinaryString (*mem++));
+	}
+	write_log ("\n");
+    }
+}
+
+int picasso_nr_resolutions (void)
+{
+    return mode_count;
+}
+
+static void ShowSupportedResolutions (void)
+{
+    int i;
+
+    return;
+
+    for (i = 0; i < mode_count; i++)
+	write_log ("%s\n", DisplayModes[i].name);
+}
+
+static uae_u8 GetBytesPerPixel (uae_u32 RGBfmt)
+{
+    switch (RGBfmt) {
+    case RGBFB_CLUT:
+	return 1;
+
+    case RGBFB_A8R8G8B8:
+    case RGBFB_A8B8G8R8:
+    case RGBFB_R8G8B8A8:
+    case RGBFB_B8G8R8A8:
+	return 4;
+
+    case RGBFB_B8G8R8:
+    case RGBFB_R8G8B8:
+	return 3;
+
+    case RGBFB_R5G5B5:
+    case RGBFB_R5G6B5:
+    case RGBFB_R5G6B5PC:
+    case RGBFB_R5G5B5PC:
+    case RGBFB_B5G6R5PC:
+    case RGBFB_B5G5R5PC:
+	return 2;
+    default:
+	write_log ("ERROR - GetBytesPerPixel() was unsuccessful with 0x%x?!\n", RGBfmt);
+	return 0;
+    }
+}
+
+/*
+ * Amiga <-> native structure conversion functions
+ */
+
+static int CopyRenderInfoStructureA2U (uaecptr amigamemptr, struct RenderInfo *ri)
+{
+    uaecptr memp = get_long (amigamemptr + PSSO_RenderInfo_Memory);
+
+    if (valid_address (memp, PSSO_RenderInfo_sizeof)) {
+	ri->Memory = get_real_address (memp);
+	ri->BytesPerRow = get_word (amigamemptr + PSSO_RenderInfo_BytesPerRow);
+	ri->RGBFormat = get_long (amigamemptr + PSSO_RenderInfo_RGBFormat);
+	return 1;
+    }
+    write_log ("ERROR - Invalid RenderInfo memory area...\n");
+    return 0;
+}
+
+static int CopyPatternStructureA2U (uaecptr amigamemptr, struct Pattern *pattern)
+{
+    uaecptr memp = get_long (amigamemptr + PSSO_Pattern_Memory);
+    if (valid_address (memp, PSSO_Pattern_sizeof)) {
+	pattern->Memory = get_real_address (memp);
+	pattern->XOffset = get_word (amigamemptr + PSSO_Pattern_XOffset);
+	pattern->YOffset = get_word (amigamemptr + PSSO_Pattern_YOffset);
+	pattern->FgPen = get_long (amigamemptr + PSSO_Pattern_FgPen);
+	pattern->BgPen = get_long (amigamemptr + PSSO_Pattern_BgPen);
+	pattern->Size = get_byte (amigamemptr + PSSO_Pattern_Size);
+	pattern->DrawMode = get_byte (amigamemptr + PSSO_Pattern_DrawMode);
+	return 1;
+    }
+    write_log ("ERROR - Invalid Pattern memory area...\n");
+    return 0;
+}
+
+static void CopyColorIndexMappingA2U (uaecptr amigamemptr, struct ColorIndexMapping *cim)
+{
+    int i;
+    cim->ColorMask = get_long (amigamemptr);
+    for (i = 0; i < 256; i++, amigamemptr += 4)
+	cim->Colors[i] = get_long (amigamemptr + 4);
+}
+
+static int CopyBitMapStructureA2U (uaecptr amigamemptr, struct BitMap *bm)
+{
+    int i;
+
+    bm->BytesPerRow = get_word (amigamemptr + PSSO_BitMap_BytesPerRow);
+    bm->Rows = get_word (amigamemptr + PSSO_BitMap_Rows);
+    bm->Flags = get_byte (amigamemptr + PSSO_BitMap_Flags);
+    bm->Depth = get_byte (amigamemptr + PSSO_BitMap_Depth);
+
+    for (i = 0; i < bm->Depth; i++) {
+	uaecptr plane = get_long (amigamemptr + PSSO_BitMap_Planes + i * 4);
+	switch (plane) {
+	case 0:
+	    bm->Planes[i] = &all_zeros_bitmap;
+	    break;
+	case 0xFFFFFFFF:
+	    bm->Planes[i] = &all_ones_bitmap;
+	    break;
+	default:
+	    if (valid_address (plane, bm->BytesPerRow * bm->Rows))
+		bm->Planes[i] = get_real_address (plane);
+	    else
+		return 0;
+	    break;
+	}
+    }
+    return 1;
+}
+
+static int CopyTemplateStructureA2U (uaecptr amigamemptr, struct Template *tmpl)
+{
+    uaecptr memp = get_long (amigamemptr + PSSO_Template_Memory);
+
+    if (valid_address (memp, 1 /* FIXME */ )) {
+	tmpl->Memory = get_real_address (memp);
+	tmpl->BytesPerRow = get_word (amigamemptr + PSSO_Template_BytesPerRow);
+	tmpl->XOffset = get_byte (amigamemptr + PSSO_Template_XOffset);
+	tmpl->DrawMode = get_byte (amigamemptr + PSSO_Template_DrawMode);
+	tmpl->FgPen = get_long (amigamemptr + PSSO_Template_FgPen);
+	tmpl->BgPen = get_long (amigamemptr + PSSO_Template_BgPen);
+	return 1;
+    }
+    write_log ("ERROR - Invalid Template memory area...\n");
+    return 0;
+}
+
+static void CopyLibResolutionStructureU2A (struct LibResolution *libres, uaecptr amigamemptr)
+{
+    char *uaememptr = 0;
+    int i;
+
+    uaememptr = get_real_address(amigamemptr);
+    memset (uaememptr, 0, PSSO_LibResolution_sizeof);	/* zero out our LibResolution structure */
+    strcpy (uaememptr + PSSO_LibResolution_P96ID, libres->P96ID);
+    put_long (amigamemptr + PSSO_LibResolution_DisplayID, libres->DisplayID);
+    put_word (amigamemptr + PSSO_LibResolution_Width, libres->Width);
+    put_word (amigamemptr + PSSO_LibResolution_Height, libres->Height);
+    put_word (amigamemptr + PSSO_LibResolution_Flags, libres->Flags);
+    for (i = 0; i < MAXMODES; i++)
+	put_long (amigamemptr + PSSO_LibResolution_Modes + i * 4, libres->Modes[i]);
+#if 0
+    put_long (amigamemptr, libres->Node.ln_Succ);
+    put_long (amigamemptr + 4, libres->Node.ln_Pred);
+    put_byte (amigamemptr + 8, libres->Node.ln_Type);
+    put_byte (amigamemptr + 9, libres->Node.ln_Pri);
+#endif
+    put_long (amigamemptr + 10, amigamemptr + PSSO_LibResolution_P96ID);
+    put_long (amigamemptr + PSSO_LibResolution_BoardInfo, libres->BoardInfo);
+}
+
+/* list is Amiga address of list, in correct endian format for UAE
+ * node is Amiga address of node, in correct endian format for UAE */
+static void AmigaListAddTail (uaecptr list, uaecptr node)
+{
+    uaecptr amigamemptr = 0;
+
+    if (get_long (list + 8) == list) {
+	/* Empty list - set it up */
+	put_long (list, node);	/* point the lh_Head to our new node */
+	put_long (list + 4, 0);	/* set the lh_Tail to NULL */
+	put_long (list + 8, node);	/* point the lh_TailPred to our new node */
+
+	/* Adjust the new node - don't rely on it being zeroed out */
+	put_long (node, 0);	/* ln_Succ */
+	put_long (node + 4, 0);	/* ln_Pred */
+    } else {
+	amigamemptr = get_long (list + 8);	/* get the lh_TailPred contents */
+
+	put_long (list + 8, node);	/* point the lh_TailPred to our new node */
+
+	/* Adjust the previous lh_TailPred node */
+	put_long (amigamemptr, node);	/* point the ln_Succ to our new node */
+
+	/* Adjust the new node - don't rely on it being zeroed out */
+	put_long (node, 0);	/* ln_Succ */
+	put_long (node + 4, amigamemptr);	/* ln_Pred */
+    }
+}
+
+/*
+ * Functions to perform an action on the real screen
+ */
+
+/*
+ * Fill a rectangle on the screen.  src points to the start of a line of the
+ * filled rectangle in the frame buffer; it can be used as a memcpy source if
+ * there is no OS specific function to fill the rectangle.
+ */
+
+static void do_fillrect (uae_u8 * src, int x, int y, int width, int height,
+			 uae_u32 pen, int Bpp, RGBFTYPE rgbtype)
+{
+    uae_u8 *dst;
+
+    /* Clipping.  */
+    x -= picasso96_state.XOffset;
+    y -= picasso96_state.YOffset;
+    if (x < 0) {
+	width += x;
+	x = 0;
+    }
+    if (y < 0) {
+	height += y;
+	y = 0;
+    }
+    if (x + width > picasso96_state.Width)
+	width = picasso96_state.Width - x;
+    if (y + height > picasso96_state.Height)
+	height = picasso96_state.Height - y;
+
+    if (width <= 0 || height <= 0)
+	return;
+
+    /* Try OS specific fillrect function here; and return if successful.  */
+
+    DX_Invalidate (y, y + height - 1);
+    if (!picasso_vidinfo.extra_mem)
+	return;
+
+    width *= picasso96_state.BytesPerPixel;
+    dst = gfx_lock_picasso ();
+    if (!dst)
+	goto out;
+
+    dst += y * picasso_vidinfo.rowbytes + x * picasso_vidinfo.pixbytes;
+    if (picasso_vidinfo.rgbformat == picasso96_state.RGBFormat) {
+	if (Bpp == 1) {
+	    while (height-- > 0) {
+		memset (dst, pen, width);
+		dst += picasso_vidinfo.rowbytes;
+	    }
+	} else {
+	    while (height-- > 0) {
+		memcpy (dst, src, width);
+		dst += picasso_vidinfo.rowbytes;
+	    }
+	}
+    } else {
+	int psiz = GetBytesPerPixel (picasso_vidinfo.rgbformat);
+	if (picasso96_state.RGBFormat != RGBFB_CHUNKY)
+	    abort ();
+
+	while (height-- > 0) {
+	    int i;
+	    switch (psiz) {
+	    case 2:
+		for (i = 0; i < width; i++)
+		    *((uae_u16 *) dst + i) = picasso_vidinfo.clut[src[i]];
+		break;
+	    case 4:
+		for (i = 0; i < width; i++)
+		    *((uae_u32 *) dst + i) = picasso_vidinfo.clut[src[i]];
+		break;
+	    default:
+		abort ();
+	    }
+	    dst += picasso_vidinfo.rowbytes;
+	}
+    }
+  out:
+    gfx_unlock_picasso ();
+}
+
+/*
+ * This routine modifies the real screen buffer after a blit has been
+ * performed in the save area. If can_do_blit is nonzero, the blit can
+ * be performed within the real screen buffer; otherwise, this routine
+ * must do it by hand using the data in the save area, pointed to by
+ * srcp.
+ */
+
+static void do_blit (struct RenderInfo *ri, int Bpp, int srcx, int srcy,
+		     int dstx, int dsty, int width, int height,
+		     BLIT_OPCODE opcode, int can_do_blit)
+{
+    int xoff = picasso96_state.XOffset;
+    int yoff = picasso96_state.YOffset;
+    uae_u8 *srcp, *dstp;
+
+    /* Clipping.  */
+    dstx -= xoff;
+    dsty -= yoff;
+    if (srcy < yoff || srcx < xoff
+	|| srcx - xoff + width > picasso96_state.Width
+	|| srcy - yoff + height > picasso96_state.Height)
+    {
+	can_do_blit = 0;
+    }
+    if (dstx < 0) {
+	srcx -= dstx;
+	width += dstx;
+	dstx = 0;
+    }
+    if (dsty < 0) {
+	srcy -= dsty;
+	height += dsty;
+	dsty = 0;
+    }
+    if (dstx + width > picasso96_state.Width)
+	width = picasso96_state.Width - dstx;
+    if (dsty + height > picasso96_state.Height)
+	height = picasso96_state.Height - dsty;
+    if (width <= 0 || height <= 0)
+	return;
+
+    /* If this RenderInfo points at something else than the currently visible
+     * screen, we must ignore the blit.  */
+    if (can_do_blit) {
+	/*
+	 * Call OS blitting function that can do it in video memory.
+	 * Should return if it was successful
+	 */
+    }
+
+    /* If no OS blit available, we do a copy from the P96 framebuffer in Amiga
+       memory to the host's frame buffer.  */
+    DX_Invalidate (dsty, dsty + height - 1);
+    if (!picasso_vidinfo.extra_mem)
+	return;
+
+    dstp = gfx_lock_picasso ();
+    if (dstp == 0)
+	goto out;
+    dstp += dsty * picasso_vidinfo.rowbytes + dstx * picasso_vidinfo.pixbytes;
+    P96TRACE(("do_blit with srcp 0x%x, dstp 0x%x, dst_rowbytes %d, srcx %d, srcy %d, dstx %d, dsty %d, w %d, h %d, dst_pixbytes %d\n",
+        srcp, dstp, picasso_vidinfo.rowbytes, srcx, srcy, dstx, dsty, width, height, picasso_vidinfo.pixbytes));
+    P96TRACE(("gfxmem is at 0x%x\n",gfxmemory));
+
+    srcp = ri->Memory + srcx * Bpp + srcy * ri->BytesPerRow;
+    if (picasso_vidinfo.rgbformat == picasso96_state.RGBFormat) {
+	width *= Bpp;
+	while (height-- > 0) {
+	    memcpy (dstp, srcp, width);
+	    srcp += ri->BytesPerRow;
+	    dstp += picasso_vidinfo.rowbytes;
+	}
+    } else {
+	int psiz = GetBytesPerPixel (picasso_vidinfo.rgbformat);
+	if (picasso96_state.RGBFormat != RGBFB_CHUNKY)
+	    abort ();
+
+	while (height-- > 0) {
+	    int i;
+	    switch (psiz) {
+	    case 2:
+		for (i = 0; i < width; i++)
+		    *((uae_u16 *) dstp + i) = picasso_vidinfo.clut[srcp[i]];
+		break;
+	    case 4:
+		for (i = 0; i < width; i++)
+		    *((uae_u32 *) dstp + i) = picasso_vidinfo.clut[srcp[i]];
+		break;
+	    default:
+		abort ();
+	    }
+	    srcp += ri->BytesPerRow;
+	    dstp += picasso_vidinfo.rowbytes;
+	}
+    }
+  out:
+    gfx_unlock_picasso ();
+}
+
+/*
+ * Invert a rectangle on the screen.
+ */
+
+static void do_invertrect (struct RenderInfo *ri, int Bpp, int x, int y, int width, int height)
+{
+#if 0
+    /* Clipping.  */
+    x -= picasso96_state.XOffset;
+    y -= picasso96_state.YOffset;
+    if (x < 0) {
+	width += x;
+	x = 0;
+    }
+    if (y < 0) {
+	height += y;
+	y = 0;
+    }
+    if (x + width > picasso96_state.Width)
+	width = picasso96_state.Width - x;
+    if (y + height > picasso96_state.Height)
+	height = picasso96_state.Height - y;
+
+    if (width <= 0 || height <= 0)
+	return;
+
+#endif
+    /* TODO: Try OS specific invertrect function here; and return if successful.  */
+
+    do_blit (ri, Bpp, x, y, x, y, width, height, BLIT_SRC, 0);
+}
+
+static uaecptr wgfx_linestart;
+static uaecptr wgfx_lineend;
+static uaecptr wgfx_min, wgfx_max;
+static long wgfx_y;
+
+static void wgfx_do_flushline (void)
+{
+    int src_y = wgfx_y;
+    long x0, x1, width;
+    uae_u8 *src, *dstp;
+    int Bpp = GetBytesPerPixel (picasso_vidinfo.rgbformat);
+    int fb_bpp = picasso96_state.BytesPerPixel;
+
+    wgfx_y -= picasso96_state.YOffset;
+    if (wgfx_y < 0 || wgfx_y >= picasso96_state.Height)
+	goto out1;
+
+    DX_Invalidate (wgfx_y, wgfx_y);
+    if (!picasso_vidinfo.extra_mem)
+	goto out1;
+
+    x0 = wgfx_min - wgfx_linestart;
+    width = wgfx_max - wgfx_min;
+    x0 -= picasso96_state.XOffset * fb_bpp;
+    if (x0 < 0) {
+	width += x0;
+	wgfx_min += x0;
+	x0 = 0;
+    }
+    if (x0 + width > picasso96_state.Width * fb_bpp)
+	width = picasso96_state.Width * fb_bpp - x0;
+
+    dstp = gfx_lock_picasso ();
+    if (dstp == 0)
+	goto out;
+
+    P96TRACE(("flushing %d\n", wgfx_y));
+    src = gfxmemory + wgfx_min;
+
+    if (picasso_vidinfo.rgbformat == picasso96_state.RGBFormat) {
+	dstp += wgfx_y * picasso_vidinfo.rowbytes + x0;
+	memcpy (dstp, src, width);
+    } else {
+	int i;
+
+	if (picasso96_state.RGBFormat != RGBFB_CHUNKY)
+	    abort ();
+
+	dstp += wgfx_y * picasso_vidinfo.rowbytes + x0 * Bpp;
+	switch (Bpp) {
+	case 2:
+	    for (i = 0; i < width; i++)
+		*((uae_u16 *) dstp + i) = picasso_vidinfo.clut[src[i]];
+	    break;
+	case 4:
+	    for (i = 0; i < width; i++)
+		*((uae_u32 *) dstp + i) = picasso_vidinfo.clut[src[i]];
+	    break;
+	default:
+	    abort ();
+	}
+    }
+
+  out:
+    gfx_unlock_picasso ();
+  out1:
+    wgfx_linestart = 0xFFFFFFFF;
+}
+
+STATIC_INLINE void wgfx_flushline (void)
+{
+    if (wgfx_linestart == 0xFFFFFFFF || !picasso_on)
+	return;
+    wgfx_do_flushline ();
+}
+
+static int renderinfo_is_current_screen (struct RenderInfo *ri)
+{
+    if (!picasso_on)
+	return 0;
+    if (ri->Memory != gfxmemory + (picasso96_state.Address - gfxmem_start))
+	return 0;
+
+    return 1;
+}
+
+/* Clear our screen, since we've got a new Picasso screen-mode, and refresh with the proper contents
+ * This is called on several occasions:
+ * 1. Amiga-->Picasso transition, via SetSwitch()
+ * 2. Picasso-->Picasso transition, via SetPanning().
+ * 3. whenever the graphics code notifies us that the screen contents have been lost.
+ */
+void picasso_refresh (void)
+{
+    struct RenderInfo ri;
+
+    if (!picasso_on)
+	return;
+
+    /* Make sure that the first time we show a Picasso video mode, we don't blit any crap.
+     * We can do this by checking if we have an Address yet.  */
+    if (picasso96_state.Address) {
+	unsigned int width, height;
+	/* blit the stuff from our static frame-buffer to the gfx-card */
+	ri.Memory = gfxmemory + (picasso96_state.Address - gfxmem_start);
+	ri.BytesPerRow = picasso96_state.BytesPerRow;
+	ri.RGBFormat = picasso96_state.RGBFormat;
+
+        if (set_panning_called) {
+	    width = picasso96_state.VirtualWidth;
+	    height = picasso96_state.VirtualHeight;
+	} else {
+	    width = picasso96_state.Width;
+	    height = picasso96_state.Height;
+	}
+
+	do_blit (&ri, picasso96_state.BytesPerPixel, 0, 0, 0, 0, width, height, BLIT_SRC, 0);
+    } else
+	write_log ("ERROR - picasso_refresh() can't refresh!\n");
+}
+
+/*
+ * BOOL FindCard(struct BoardInfo *bi);       and
+ *
+ * FindCard is called in the first stage of the board initialisation and
+ * configuration and is used to look if there is a free and unconfigured
+ * board of the type the driver is capable of managing. If it finds one,
+ * it immediately reserves it for use by Picasso96, usually by clearing
+ * the CDB_CONFIGME bit in the flags field of the ConfigDev struct of
+ * this expansion card. But this is only a common example, a driver can
+ * do whatever it wants to mark this card as used by the driver. This
+ * mechanism is intended to ensure that a board is only configured and
+ * used by one driver. FindBoard also usually fills some fields of the
+ * BoardInfo struct supplied by the caller, the rtg.library, for example
+ * the MemoryBase, MemorySize and RegisterBase fields.
+ */
+uae_u32 picasso_FindCard (void)
+{
+    uaecptr AmigaBoardInfo = m68k_areg (regs, 0);
+    /* NOTES: See BoardInfo struct definition in Picasso96 dev info */
+
+    if (allocated_gfxmem && !picasso96_state.CardFound) {
+	/* Fill in MemoryBase, MemorySize */
+	put_long (AmigaBoardInfo + PSSO_BoardInfo_MemoryBase, gfxmem_start);
+	/* size of memory, minus a 32K chunk: 16K for pattern bitmaps, 16K for resolution list */
+	put_long (AmigaBoardInfo + PSSO_BoardInfo_MemorySize, allocated_gfxmem - 32768);
+
+	picasso96_state.CardFound = 1;	/* mark our "card" as being found */
+	return -1;
+    } else
+	return 0;
+}
+
+static void FillBoardInfo (uaecptr amigamemptr, struct LibResolution *res, struct PicassoResolution *dm)
+{
+    char *uaememptr;
+    switch (dm->depth) {
+    case 1:
+	res->Modes[CHUNKY] = amigamemptr;
+	break;
+    case 2:
+	res->Modes[HICOLOR] = amigamemptr;
+	break;
+    case 3:
+	res->Modes[TRUECOLOR] = amigamemptr;
+	break;
+    default:
+	res->Modes[TRUEALPHA] = amigamemptr;
+	break;
+    }
+    uaememptr = get_real_address(amigamemptr);
+    memset (uaememptr, 0, PSSO_ModeInfo_sizeof);	/* zero out our ModeInfo struct */
+
+    put_word (amigamemptr + PSSO_ModeInfo_Width, dm->res.width);
+    put_word (amigamemptr + PSSO_ModeInfo_Height, dm->res.height);
+    put_byte (amigamemptr + PSSO_ModeInfo_Depth, dm->depth * 8);
+    put_byte (amigamemptr + PSSO_ModeInfo_Flags, 0);
+    put_word (amigamemptr + PSSO_ModeInfo_HorTotal, dm->res.width);
+    put_word (amigamemptr + PSSO_ModeInfo_HorBlankSize, 0);
+    put_word (amigamemptr + PSSO_ModeInfo_HorSyncStart, 0);
+    put_word (amigamemptr + PSSO_ModeInfo_HorSyncSize, 0);
+    put_byte (amigamemptr + PSSO_ModeInfo_HorSyncSkew, 0);
+    put_byte (amigamemptr + PSSO_ModeInfo_HorEnableSkew, 0);
+
+    put_word (amigamemptr + PSSO_ModeInfo_VerTotal, dm->res.height);
+    put_word (amigamemptr + PSSO_ModeInfo_VerBlankSize, 0);
+    put_word (amigamemptr + PSSO_ModeInfo_VerSyncStart, 0);
+    put_word (amigamemptr + PSSO_ModeInfo_VerSyncSize, 0);
+
+    put_byte (amigamemptr + PSSO_ModeInfo_first_union, 98);
+    put_byte (amigamemptr + PSSO_ModeInfo_second_union, 14);
+
+    put_long (amigamemptr + PSSO_ModeInfo_PixelClock, dm->res.width * dm->res.height * dm->refresh);
+}
+
+static uae_u32 AssignModeID (int i, int count)
+{
+    if (DisplayModes[i].res.width == 320 && DisplayModes[i].res.height == 200)
+	return 0x50001000;
+    else if (DisplayModes[i].res.width == 320 && DisplayModes[i].res.height == 240)
+	return 0x50011000;
+    else if (DisplayModes[i].res.width == 640 && DisplayModes[i].res.height == 400)
+	return 0x50021000;
+    else if (DisplayModes[i].res.width == 640 && DisplayModes[i].res.height == 480)
+	return 0x50031000;
+    else if (DisplayModes[i].res.width == 800 && DisplayModes[i].res.height == 600)
+	return 0x50041000;
+    else if (DisplayModes[i].res.width == 1024 && DisplayModes[i].res.height == 768)
+	return 0x50051000;
+    else if (DisplayModes[i].res.width == 1152 && DisplayModes[i].res.height == 864)
+	return 0x50061000;
+    else if (DisplayModes[i].res.width == 1280 && DisplayModes[i].res.height == 1024)
+	return 0x50071000;
+    else if (DisplayModes[i].res.width == 1600 && DisplayModes[i].res.height == 1280)
+	return 0x50081000;
+
+    return 0x50091000 + count * 0x10000;
+}
+
+/****************************************
+* InitCard()
+*
+* a2: BoardInfo structure ptr - Amiga-based address in Intel endian-format
+*
+* Job - fill in the following structure members:
+* gbi_RGBFormats: the pixel formats that the host-OS of UAE supports
+*     If UAE is running in a window, it should ONLY report the pixel format of the host-OS desktop
+*     If UAE is running full-screen, it should report ALL pixel formats that the host-OS can handle in full-screen
+*     NOTE: If full-screen, and the user toggles to windowed-mode, all hell will break loose visually.  Must inform
+*           user that they're doing something stupid (unless their desktop and full-screen colour modes match).
+* gbi_SoftSpriteFlags: should be the same as above for now, until actual cursor support is added
+* gbi_BitsPerCannon: could be 6 or 8 or ???, depending on the host-OS gfx-card
+* gbi_MaxHorResolution: fill this in for all modes (even if you don't support them)
+* gbi_MaxVerResolution: fill this in for all modes (even if you don't support them)
+*/
+uae_u32 picasso_InitCard (void)
+{
+    struct LibResolution res;
+    int i;
+    int ModeInfoStructureCount = 1, LibResolutionStructureCount = 0;
+    uaecptr amigamemptr = 0;
+    uaecptr AmigaBoardInfo = m68k_areg (regs, 2);
+
+	if (regs.mmu_enabled)	{
+		mmu_make_transparent_region(gfxmem_start, allocated_gfxmem, 1);
+	}
+	
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_BitsPerCannon, DX_BitsPerCannon ());
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_RGBFormats, picasso96_pixel_format);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_SoftSpriteFlags, picasso96_pixel_format);
+    put_long (AmigaBoardInfo + PSSO_BoardInfo_BoardType, BT_uaegfx);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxHorResolution + 0, planar.width);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxHorResolution + 2, chunky.width);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxHorResolution + 4, hicolour.width);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxHorResolution + 6, truecolour.width);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxHorResolution + 8, alphacolour.width);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 0, planar.height);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 2, chunky.height);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 4, hicolour.height);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 6, truecolour.height);
+    put_word (AmigaBoardInfo + PSSO_BoardInfo_MaxVerResolution + 8, alphacolour.height);
+
+    for (i = 0; i < mode_count;) {
+	int j = i;
+	/* Add a LibResolution structure to the ResolutionsList MinList in our BoardInfo */
+	res.DisplayID = AssignModeID (i, LibResolutionStructureCount);
+	res.BoardInfo = AmigaBoardInfo;
+	res.Width = DisplayModes[i].res.width;
+	res.Height = DisplayModes[i].res.height;
+	res.Flags = P96F_PUBLIC;
+	res.P96ID[0] = 'P';
+	res.P96ID[1] = '9';
+	res.P96ID[2] = '6';
+	res.P96ID[3] = '-';
+	res.P96ID[4] = '0';
+	res.P96ID[5] = ':';
+	strcpy (res.Name, "uaegfx:");
+	strncat (res.Name, DisplayModes[i].name, strchr (DisplayModes[i].name, ',') - DisplayModes[i].name);
+	res.Modes[PLANAR] = 0;
+	res.Modes[CHUNKY] = 0;
+	res.Modes[HICOLOR] = 0;
+	res.Modes[TRUECOLOR] = 0;
+	res.Modes[TRUEALPHA] = 0;
+
+	do {
+	    /* Handle this display mode's depth */
+	    /* Only add the modes when there is enough P96 RTG memory to hold the bitmap */
+	    long required = DisplayModes[i].res.width * DisplayModes[i].res.height * DisplayModes[i].depth;
+	    if (allocated_gfxmem - 32768 > required) {
+		amigamemptr = gfxmem_start + allocated_gfxmem - (PSSO_ModeInfo_sizeof * ModeInfoStructureCount++);
+		FillBoardInfo (amigamemptr, &res, &DisplayModes[i]);
+	    }
+	    i++;
+	} while (i < mode_count
+		 && DisplayModes[i].res.width == DisplayModes[j].res.width
+		 && DisplayModes[i].res.height == DisplayModes[j].res.height);
+
+	amigamemptr = gfxmem_start + allocated_gfxmem - 16384 + (PSSO_LibResolution_sizeof * LibResolutionStructureCount++);
+	CopyLibResolutionStructureU2A (&res, amigamemptr);
+	DumpLibResolutionStructure (amigamemptr);
+	AmigaListAddTail (AmigaBoardInfo + PSSO_BoardInfo_ResolutionsList, amigamemptr);
+    }
+
+    return 0;
+}
+
+extern int x_size, y_size;
+
+/*
+ * SetSwitch:
+ * a0:	struct BoardInfo
+ * d0.w:	BOOL state
+ * this function should set a board switch to let the Amiga signal pass
+ * through when supplied with a 0 in d0 and to show the board signal if
+ * a 1 is passed in d0. You should remember the current state of the
+ * switch to avoid unneeded switching. If your board has no switch, then
+ * simply supply a function that does nothing except a RTS.
+ *
+ * NOTE: Return the opposite of the switch-state. BDK
+*/
+uae_u32 picasso_SetSwitch (void)
+{
+    uae_u16 flag = m68k_dreg (regs, 0) & 0xFFFF;
+
+    /* Do not switch immediately.  Tell the custom chip emulation about the
+     * desired state, and wait for custom.c to call picasso_enablescreen
+     * whenever it is ready to change the screen state.  */
+    picasso_requested_on = !!flag;
+#if 0
+    write_log ("SetSwitch() - trying to show %s screen\n", flag ? "picasso96" : "amiga");
+#endif
+    /* Put old switch-state in D0 */
+    return !flag;
+}
+
+void picasso_enablescreen (int on)
+{
+    wgfx_linestart = 0xFFFFFFFF;
+    picasso_refresh ();
+#if 0
+    write_log ("SetSwitch() - showing %s screen\n", on ? "picasso96" : "amiga");
+#endif
+}
+
+static int first_color_changed = 256;
+static int last_color_changed = -1;
+
+void picasso_handle_vsync (void)
+{
+    if (first_color_changed < last_color_changed) {
+	DX_SetPalette (first_color_changed, last_color_changed - first_color_changed);
+	/* If we're emulating a CLUT mode, we need to redraw the entire screen.  */
+	if (picasso_vidinfo.rgbformat != picasso96_state.RGBFormat)
+	    picasso_refresh ();
+    }
+
+    first_color_changed = 256;
+    last_color_changed = -1;
+}
+
+void picasso_clip_mouse (int *px, int *py)
+{
+    int xoff = picasso96_state.XOffset;
+    int yoff = picasso96_state.YOffset;
+    if (*px < -xoff)
+	*px = -xoff;
+    if (*px + xoff > picasso_vidinfo.width)
+	*px = picasso_vidinfo.width - xoff;
+    if (*py < -yoff)
+	*py = -yoff;
+    if (*py + yoff > picasso_vidinfo.height)
+	*py = picasso_vidinfo.height - yoff;
+}
+
+/*
+ * SetColorArray:
+ * a0: struct BoardInfo
+ * d0.w: startindex
+ * d1.w: count
+ * when this function is called, your driver has to fetch "count" color
+ * values starting at "startindex" from the CLUT field of the BoardInfo
+ * structure and write them to the hardware. The color values are always
+ * between 0 and 255 for each component regardless of the number of bits
+ * per cannon your board has. So you might have to shift the colors
+ * before writing them to the hardware.
+ */
+uae_u32 picasso_SetColorArray (void)
+{
+    /* Fill in some static UAE related structure about this new CLUT setting
+     * We need this for CLUT-based displays, and for mapping CLUT to hi/true colour */
+    uae_u16 start = m68k_dreg (regs, 0);
+    uae_u16 count = m68k_dreg (regs, 1);
+    int i;
+    uaecptr boardinfo = m68k_areg (regs, 0);
+    uaecptr clut = boardinfo + PSSO_BoardInfo_CLUT + start * 3;
+    int changed = 0;
+
+    for (i = start; i < start + count; i++) {
+	int r = get_byte (clut);
+	int g = get_byte (clut + 1);
+	int b = get_byte (clut + 2);
+
+	changed |= (picasso96_state.CLUT[i].Red != r || picasso96_state.CLUT[i].Green != g || picasso96_state.CLUT[i].Blue != b);
+
+	picasso96_state.CLUT[i].Red = r;
+	picasso96_state.CLUT[i].Green = g;
+	picasso96_state.CLUT[i].Blue = b;
+	clut += 3;
+    }
+    if (changed) {
+	if (start < first_color_changed)
+	    first_color_changed = start;
+	if (start + count > last_color_changed)
+	    last_color_changed = start + count;
+    }
+    /*write_log ("SetColorArray(%d,%d)\n", start, count); */
+    return 1;
+}
+
+/*
+ * SetDAC:
+ * a0: struct BoardInfo
+ * d7: RGBFTYPE RGBFormat
+ * This function is called whenever the RGB format of the display changes,
+ * e.g. from chunky to TrueColor. Usually, all you have to do is to set
+ * the RAMDAC of your board accordingly.
+ */
+uae_u32 picasso_SetDAC (void)
+{
+    /* Fill in some static UAE related structure about this new DAC setting
+     * Lets us keep track of what pixel format the Amiga is thinking about in our frame-buffer */
+
+    write_log ("SetDAC()\n");
+    return 1;
+}
+
+static void init_picasso_screen (void)
+{
+    int width = picasso96_state.Width;
+    int height = picasso96_state.Height;
+    int vwidth = picasso96_state.VirtualWidth;
+    int vheight = picasso96_state.VirtualHeight;
+    int xoff = 0;
+    int yoff = 0;
+
+    if (!set_gc_called)
+	return;
+
+    if (set_panning_called) {
+	picasso96_state.Extent = picasso96_state.Address + (picasso96_state.BytesPerRow * vheight);
+	xoff = picasso96_state.XOffset;
+	yoff = picasso96_state.YOffset;
+    }
+
+    gfx_set_picasso_modeinfo (width, height, picasso96_state.GC_Depth, picasso96_state.RGBFormat);
+    DX_SetPalette (0, 256);
+
+    wgfx_linestart = 0xFFFFFFFF;
+    picasso_refresh ();
+}
+
+/*
+ * SetGC:
+ * a0: struct BoardInfo
+ * a1: struct ModeInfo
+ * d0: BOOL Border
+ * This function is called whenever another ModeInfo has to be set. This
+ * function simply sets up the CRTC and TS registers to generate the
+ * timing used for that screen mode. You should not set the DAC, clocks
+ * or linear start adress. They will be set when appropriate by their
+ * own functions.
+ */
+uae_u32 picasso_SetGC (void)
+{
+    /* Fill in some static UAE related structure about this new ModeInfo setting */
+    uaecptr modeinfo = m68k_areg (regs, 1);
+
+    picasso96_state.Width = get_word (modeinfo + PSSO_ModeInfo_Width);
+    picasso96_state.VirtualWidth = picasso96_state.Width;	/* in case SetPanning doesn't get called */
+
+    picasso96_state.Height = get_word (modeinfo + PSSO_ModeInfo_Height);
+    picasso96_state.VirtualHeight = picasso96_state.Height;
+
+    picasso96_state.GC_Depth = get_byte (modeinfo + PSSO_ModeInfo_Depth);
+    picasso96_state.GC_Flags = get_byte (modeinfo + PSSO_ModeInfo_Flags);
+
+    P96TRACE (("SetGC(%d,%d,%d)\n", picasso96_state.Width, picasso96_state.Height, picasso96_state.GC_Depth));
+
+    set_gc_called = 1;		/* @@@ when do we need to reset this? */
+    init_picasso_screen ();
+    return 1;
+}
+
+/*
+ * SetPanning:
+ * a0: struct BoardInfo
+ * a1: UBYTE *Memory
+ * d0: uae_u16 Width
+ * d1: WORD XOffset
+ * d2: WORD YOffset
+ * d7: RGBFTYPE RGBFormat
+ * This function sets the view origin of a display which might also be
+ * overscanned. In register a1 you get the start address of the screen
+ * bitmap on the Amiga side. You will have to subtract the starting
+ * address of the board memory from that value to get the memory start
+ * offset within the board. Then you get the offset in pixels of the
+ * left upper edge of the visible part of an overscanned display. From
+ * these values you will have to calculate the LinearStartingAddress
+ * fields of the CRTC registers.
+
+ * NOTE: SetPanning() can be used to know when a Picasso96 screen is
+ * being opened.  Better to do the appropriate clearing of the
+ * background here than in SetSwitch() derived functions,
+ * because SetSwitch() is not called for subsequent Picasso screens.
+ */
+uae_u32 picasso_SetPanning (void)
+{
+    uae_u16 Width = m68k_dreg (regs, 0);
+    uaecptr start_of_screen = m68k_areg (regs, 1);
+    uaecptr bi = m68k_areg (regs, 0);
+    uaecptr bmeptr = get_long (bi + PSSO_BoardInfo_BitMapExtra);        /* Get our BoardInfo ptr's BitMapExtra ptr */
+    int oldxoff = picasso96_state.XOffset;
+    int oldyoff = picasso96_state.YOffset;
+#if 0
+    /* @@@ This is in WinUAE, but it breaks things.  */
+    if (oldscr == 0) {
+	oldscr = start_of_screen;
+    }
+    if ((oldscr != start_of_screen)) {
+	set_gc_called = 0;
+	oldscr = start_of_screen;
+    }
+#endif
+
+    picasso96_state.Address = start_of_screen;	/* Amiga-side address */
+    picasso96_state.XOffset = (uae_s16) m68k_dreg (regs, 1);
+    picasso96_state.YOffset = (uae_s16) m68k_dreg (regs, 2);
+    picasso96_state.VirtualWidth = get_word (bmeptr + PSSO_BitMapExtra_Width);
+    picasso96_state.VirtualHeight = get_word (bmeptr + PSSO_BitMapExtra_Height);
+    picasso96_state.RGBFormat = m68k_dreg (regs, 7);
+    picasso96_state.BytesPerPixel = GetBytesPerPixel (picasso96_state.RGBFormat);
+    picasso96_state.BytesPerRow = Width * picasso96_state.BytesPerPixel;
+
+    set_panning_called = 1;
+    P96TRACE (("SetPanning(%d, %d, %d) Start 0x%x, BPR %d\n",
+	       Width, picasso96_state.XOffset, picasso96_state.YOffset, start_of_screen, picasso96_state.BytesPerRow));
+
+    init_picasso_screen ();
+
+    lastmx += oldxoff - picasso96_state.XOffset;
+    lastmy += oldyoff - picasso96_state.YOffset;
+    
+    return 1;
+}
+
+static void do_xor8 (uae_u8 * ptr, long len, uae_u32 val)
+{
+    int i;
+#if 0 && defined ALIGN_POINTER_TO32
+    int align_adjust = ALIGN_POINTER_TO32 (ptr);
+    int len2;
+
+    len -= align_adjust;
+    while (align_adjust) {
+	*ptr ^= val;
+	ptr++;
+	align_adjust--;
+    }
+    len2 = len >> 2;
+    len -= len2 << 2;
+    for (i = 0; i < len2; i++, ptr += 4) {
+	*(uae_u32 *) ptr ^= val;
+    }
+    while (len) {
+	*ptr ^= val;
+	ptr++;
+	len--;
+    }
+    return;
+#endif
+    for (i = 0; i < len; i++, ptr++) {
+	do_put_mem_byte (ptr, do_get_mem_byte (ptr) ^ val);
+    }
+}
+
+/*
+ * InvertRect:
+ * 
+ * Inputs:
+ * a0:struct BoardInfo *bi
+ * a1:struct RenderInfo *ri
+ * d0.w:X
+ * d1.w:Y
+ * d2.w:Width
+ * d3.w:Height
+ * d4.l:Mask
+ * d7.l:RGBFormat
+ * 
+ * This function is used to invert a rectangular area on the board. It is called by BltBitMap,
+ * BltPattern and BltTemplate.
+ */
+uae_u32 picasso_InvertRect (void)
+{
+    uaecptr renderinfo = m68k_areg (regs, 1);
+    unsigned long X = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long Y = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long Width = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long Height = (uae_u16) m68k_dreg (regs, 3);
+    uae_u32 mask = m68k_dreg (regs, 4);
+    int Bpp = GetBytesPerPixel (m68k_dreg (regs, 7));
+    uae_u32 xorval;
+    unsigned int lines;
+    struct RenderInfo ri;
+    uae_u8 *uae_mem;
+    unsigned long width_in_bytes;
+
+    wgfx_flushline ();
+
+    if (!CopyRenderInfoStructureA2U (renderinfo, &ri))
+	return 0;
+
+    P96TRACE (("InvertRect: X %d Y %d Width %d Height %d\n", X, Y, Width, Height));
+    /*write_log ("InvertRect %d %lx\n", Bpp, (long)mask); */
+
+    /* ??? Brian? mask used to be 32 bit, but it appears that only 8 bit
+     * values are passed to this function.  This code here seems to work
+     * much better... */
+    if (mask != 0xFF && Bpp > 1) {
+	write_log ("InvertRect: not obeying mask 0x%x properly with Bpp %d.\n", mask, Bpp);
+	mask = 0xFF;
+    }
+    if ((mask & ~0xFF) != 0) {
+	write_log ("InvertRect: mask has high bits set!\n");
+    }
+    xorval = 0x01010101 * (mask & 0xFF);
+    width_in_bytes = Bpp * Width;
+    uae_mem = ri.Memory + Y * ri.BytesPerRow + X * Bpp;
+
+    for (lines = 0; lines < Height; lines++, uae_mem += ri.BytesPerRow)
+	do_xor8 (uae_mem, width_in_bytes, xorval);
+
+    if (renderinfo_is_current_screen (&ri)) {
+	if (mask == 0xFF)
+	    do_invertrect (&ri, Bpp, X, Y, Width, Height);
+	else
+	    do_blit (&ri, Bpp, X, Y, X, Y, Width, Height, BLIT_SRC, 0);
+    }
+
+    return 1;			/* 1 if supported, 0 otherwise */
+}
+
+/* Fill a rectangle in the Amiga-memory frame buffer.  */
+
+STATIC_INLINE void do_fillrect_frame_buffer (struct RenderInfo *ri, int X, int Y,
+					     int Width, int Height, uae_u32 Pen, int Bpp,
+					     RGBFTYPE RGBFormat)
+{
+    uae_u8 *start, *oldstart, *dst;
+    long lines, cols;
+
+    /* Do our virtual frame-buffer memory.  First, we do a single line fill by hand */
+    oldstart = start = ri->Memory + Y * ri->BytesPerRow + X * Bpp;
+    switch (Bpp) {
+    case 1:
+	memset (start, Pen, Width);
+	break;
+    case 2:
+	for (cols = 0; cols < Width; cols++) {
+	    do_put_mem_word ((uae_u16 *) start, Pen);
+	    start += 2;
+	}
+	break;
+    case 3:
+	for (cols = 0; cols < Width; cols++) {
+	    do_put_mem_byte (start, Pen & 0x000000FF);
+	    start++;
+	    *(uae_u16 *) (start) = (Pen & 0x00FFFF00) >> 8;
+	    start += 2;
+	}
+	break;
+    case 4:
+	for (cols = 0; cols < Width; cols++) {
+	    /**start = Pen; */
+	    do_put_mem_long ((uae_u32 *) start, Pen);
+	    start += 4;
+	}
+	break;
+    }
+
+    dst = oldstart + ri->BytesPerRow;
+    /* next, we do the remaining line fills via memcpy() for > 1 BPP, otherwise some more memset() calls */
+    if (Bpp > 1) {
+	for (lines = 0; lines < (Height - 1); lines++, dst += ri->BytesPerRow)
+	    memcpy (dst, oldstart, Width * Bpp);
+    } else {
+	for (lines = 0; lines < (Height - 1); lines++, dst += ri->BytesPerRow)
+	    memset (dst, Pen, Width);
+    }
+}
+
+
+/***********************************************************
+FillRect:
+***********************************************************
+* a0: 	struct BoardInfo *
+* a1:	struct RenderInfo *
+* d0: 	WORD X
+* d1: 	WORD Y
+* d2: 	WORD Width
+* d3: 	WORD Height
+* d4:	uae_u32 Pen
+* d5:	UBYTE Mask
+* d7:	uae_u32 RGBFormat
+***********************************************************/
+uae_u32 picasso_FillRect (void)
+{
+    uaecptr renderinfo = m68k_areg (regs, 1);
+    unsigned long X = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long Y = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long Width = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long Height = (uae_u16) m68k_dreg (regs, 3);
+    uae_u32 Pen = m68k_dreg (regs, 4);
+    uae_u8 Mask = (uae_u8) m68k_dreg (regs, 5);
+    uae_u32 RGBFormat = m68k_dreg (regs, 7);
+
+    int Bpp;
+    struct RenderInfo ri;
+
+    wgfx_flushline ();
+
+    if (!CopyRenderInfoStructureA2U (renderinfo, &ri) || Y == 0xFFFF)
+	return 0;
+
+    P96TRACE(("FillRect(%d, %d, %d, %d) Pen 0x%x BPP %d BPR %d Mask 0x%x\n",
+	      X, Y, Width, Height, Pen, Bpp, ri.BytesPerRow, Mask));
+
+    if (ri.RGBFormat != RGBFormat)
+	write_log ("Weird Stuff!\n");
+
+    Bpp = GetBytesPerPixel (RGBFormat);
+
+    /* write_log ("FillRect(%d, %d, %d, %d) Pen 0x%x BPP %d BPR %d Mask 0x%x\n",
+       X, Y, Width, Height, Pen, Bpp, ri.BytesPerRow, Mask); */
+
+    if (Mask == 0xFF) {
+	do_fillrect_frame_buffer (&ri, X, Y, Width, Height, Pen, Bpp, RGBFormat);
+
+	if (renderinfo_is_current_screen (&ri))
+	    do_fillrect (ri.Memory + Y * ri.BytesPerRow + X * Bpp, X, Y,
+			 Width, Height, Pen, Bpp, RGBFormat);
+
+	return 1;
+    }
+
+    /* We get here only if Mask != 0xFF */
+    if (Bpp != 1) {
+	write_log ("Picasso: mask != 0xFF in truecolor mode!\n");
+	return 0;
+    }
+    Pen &= Mask;
+    Mask = ~Mask;
+
+    {
+	uae_u8 *start = ri.Memory + Y * ri.BytesPerRow + X * Bpp;
+	uae_u8 *end = start + Height * ri.BytesPerRow;
+	for (; start != end; start += ri.BytesPerRow) {
+	    uae_u8 *p = start;
+	    unsigned long cols;
+	    for (cols = 0; cols < Width; cols++) {
+		uae_u32 tmpval = do_get_mem_byte (p + cols) & Mask;
+		do_put_mem_byte (p + cols, Pen | tmpval);
+	    }
+	}
+    }
+
+    if (renderinfo_is_current_screen (&ri))
+	do_blit (&ri, Bpp, X, Y, X, Y, Width, Height, BLIT_SRC, 0);
+
+    return 1;
+}
+
+/*
+ * BlitRect() is a generic (any chunky pixel format) rectangle copier
+ * NOTE: If dstri is NULL, then we're only dealing with one RenderInfo area, and called from picasso_BlitRect()
+ */
+static void BlitRect (struct RenderInfo *ri, struct RenderInfo *dstri,
+		      unsigned long srcx, unsigned long srcy, unsigned long dstx, unsigned long dsty,
+		      unsigned long width, unsigned long height, uae_u8 mask, BLIT_OPCODE opcode)
+{
+    uae_u8 *src, *dst, *tmp, *tmp2, *tmp3;
+    unsigned long lines;
+    uae_u8 Bpp = GetBytesPerPixel (ri->RGBFormat);
+    uae_u8 *blitsrc;
+    unsigned long total_width = width * Bpp;
+    unsigned long linewidth = (total_width + 15) & ~15;
+    int cant_blit = 1;
+
+    /*
+     * If we have no destination RenderInfo, then we're dealing with a single-buffer action, called
+     * from picasso_BlitRect().  The code up to the DX_xxxxx() functions deals with the frame-buffer,
+     * while the DX_ functions actually deal with the visible screen.
+     *
+     * If we have a destination RenderInfo, then we've been called from picasso_BlitRectNoMaskComplete()
+     * and we need to put the results on the screen from the frame-buffer.
+     */
+    if (dstri == NULL) {
+	dstri = ri;
+	cant_blit = 0;
+    }
+
+    /* Do our virtual frame-buffer memory first */
+    src = ri->Memory + srcx * Bpp + srcy * ri->BytesPerRow;
+    dst = dstri->Memory + dstx * Bpp + dsty * dstri->BytesPerRow;
+    blitsrc = dst;
+    if (mask != 0xFF && Bpp > 1)
+	write_log ("ERROR - not obeying BlitRect() mask 0x%x properly with Bpp %d.\n", mask, Bpp);
+
+    if (mask == 0xFF || Bpp > 1) {
+	/* handle normal case efficiently */
+	if (ri->Memory == dstri->Memory && dsty == srcy) {
+	    unsigned long i;
+	    for (i = 0; i < height; i++, src += ri->BytesPerRow, dst += dstri->BytesPerRow)
+		memmove (dst, src, total_width);
+	} else if (dsty < srcy) {
+	    unsigned long i;
+	    for (i = 0; i < height; i++, src += ri->BytesPerRow, dst += dstri->BytesPerRow)
+		memcpy (dst, src, total_width);
+	} else {
+	    unsigned long i;
+	    src += (height - 1) * ri->BytesPerRow;
+	    dst += (height - 1) * dstri->BytesPerRow;
+	    for (i = 0; i < height; i++, src -= ri->BytesPerRow, dst -= dstri->BytesPerRow)
+		memcpy (dst, src, total_width);
+	}
+	if (cant_blit)
+	    srcx = dstx, srcy = dsty;
+	if (renderinfo_is_current_screen (dstri))
+	    do_blit (dstri, Bpp, srcx, srcy, dstx, dsty, width, height, opcode, !cant_blit);
+	return;
+    }
+
+    tmp3 = tmp2 = tmp = xmalloc (linewidth * height);	/* allocate enough memory for the src-rect */
+    if (!tmp)
+	return;
+
+    /* copy the src-rect into our temporary buffer space */
+    for (lines = 0; lines < height; lines++, src += ri->BytesPerRow, tmp2 += linewidth) {
+	memcpy (tmp2, src, total_width);
+    }
+
+    /* copy the temporary buffer to the destination */
+    for (lines = 0; lines < height; lines++, dst += dstri->BytesPerRow, tmp += linewidth) {
+	unsigned long cols;
+	for (cols = 0; cols < width; cols++) {
+	    dst[cols] &= ~mask;
+	    dst[cols] |= tmp[cols] & mask;
+	}
+    }
+    if (renderinfo_is_current_screen (dstri))
+	do_blit (dstri, Bpp, dstx, dsty, dstx, dsty, width, height, opcode, 0);
+
+    /* free the temp-buf */
+    free (tmp3);
+
+}
+
+/***********************************************************
+BlitRect:
+***********************************************************
+* a0: 	struct BoardInfo
+* a1:	struct RenderInfo
+* d0: 	WORD SrcX
+* d1: 	WORD SrcY
+* d2: 	WORD DstX
+* d3: 	WORD DstY
+* d4:   WORD Width
+* d5:   WORD Height
+* d6:	UBYTE Mask
+* d7:	uae_u32 RGBFormat
+***********************************************************/
+uae_u32 picasso_BlitRect (void)
+{
+    uaecptr renderinfo = m68k_areg (regs, 1);
+    unsigned long srcx = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long srcy = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long dstx = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long dsty = (uae_u16) m68k_dreg (regs, 3);
+    unsigned long width = (uae_u16) m68k_dreg (regs, 4);
+    unsigned long height = (uae_u16) m68k_dreg (regs, 5);
+    uae_u8 Mask = (uae_u8) m68k_dreg (regs, 6);
+
+    struct RenderInfo ri;
+
+    wgfx_flushline ();
+
+    if (!CopyRenderInfoStructureA2U (renderinfo, &ri))
+	return 0;
+
+    P96TRACE(("BlitRect(%d, %d, %d, %d, %d, %d, 0x%x)\n", srcx, srcy, dstx, dsty, width, height, Mask));
+    BlitRect (&ri, NULL, srcx, srcy, dstx, dsty, width, height, Mask, BLIT_SRC);
+    /*write_log ("BlitRect(%d, %d, %d, %d, %d, %d, 0x%x)\n", srcx, srcy, dstx, dsty, width, height, Mask); */
+
+    return 1;
+}
+
+/***********************************************************
+BlitRectNoMaskComplete:
+***********************************************************
+* a0: 	struct BoardInfo
+* a1:	struct RenderInfo (src)
+* a2:   struct RenderInfo (dst)
+* d0: 	WORD SrcX
+* d1: 	WORD SrcY
+* d2: 	WORD DstX
+* d3: 	WORD DstY
+* d4:   WORD Width
+* d5:   WORD Height
+* d6:	UBYTE OpCode
+* d7:	uae_u32 RGBFormat
+* NOTE: MUST return 0 in D0 if we're not handling this operation
+*       because the RGBFormat or opcode aren't supported.
+*       OTHERWISE return 1
+***********************************************************/
+uae_u32 picasso_BlitRectNoMaskComplete (void)
+{
+    uaecptr srcri = m68k_areg (regs, 1);
+    uaecptr dstri = m68k_areg (regs, 2);
+    unsigned long srcx = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long srcy = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long dstx = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long dsty = (uae_u16) m68k_dreg (regs, 3);
+    unsigned long width = (uae_u16) m68k_dreg (regs, 4);
+    unsigned long height = (uae_u16) m68k_dreg (regs, 5);
+    uae_u8 OpCode = m68k_dreg (regs, 6);
+    uae_u32 RGBFmt = m68k_dreg (regs, 7);
+    struct RenderInfo src_ri, dst_ri;
+
+    wgfx_flushline ();
+
+    if (!CopyRenderInfoStructureA2U (srcri, &src_ri)
+	|| !CopyRenderInfoStructureA2U (dstri, &dst_ri))
+	return 0;
+
+    P96TRACE(("BlitRectNoMaskComplete() op 0x%2x, xy(%4d,%4d) --> xy(%4d,%4d), wh(%4d,%4d)\n",
+	OpCode, srcx, srcy, dstx, dsty, width, height));
+
+    switch (OpCode) {
+    case 0x0C:
+	BlitRect (&src_ri, &dst_ri, srcx, srcy, dstx, dsty, width, height, 0xFF, OpCode);
+	return 1;
+
+    default:
+	/* FOR NOW! */
+	return 0;
+    }
+}
+
+/* This utility function is used both by BlitTemplate() and BlitPattern() */
+STATIC_INLINE void PixelWrite1 (uae_u8 * mem, int bits, uae_u32 fgpen, uae_u32 mask)
+{
+    if (mask != 0xFF)
+	fgpen = (fgpen & mask) | (do_get_mem_byte (mem + bits) & ~mask);
+    do_put_mem_byte (mem + bits, fgpen);
+}
+
+STATIC_INLINE void PixelWrite2 (uae_u8 * mem, int bits, uae_u32 fgpen)
+{
+    do_put_mem_word (((uae_u16 *) mem) + bits, fgpen);
+}
+
+STATIC_INLINE void PixelWrite3 (uae_u8 * mem, int bits, uae_u32 fgpen)
+{
+    do_put_mem_byte (mem + bits * 3, fgpen & 0x000000FF);
+    *(uae_u16 *) (mem + bits * 3 + 1) = (fgpen & 0x00FFFF00) >> 8;
+}
+
+STATIC_INLINE void PixelWrite4 (uae_u8 * mem, int bits, uae_u32 fgpen)
+{
+    do_put_mem_long (((uae_u32 *) mem) + bits, fgpen);
+}
+
+STATIC_INLINE void PixelWrite (uae_u8 * mem, int bits, uae_u32 fgpen, uae_u8 Bpp, uae_u32 mask)
+{
+    switch (Bpp) {
+    case 1:
+	if (mask != 0xFF)
+	    fgpen = (fgpen & mask) | (do_get_mem_byte (mem + bits) & ~mask);
+	do_put_mem_byte (mem + bits, fgpen);
+	break;
+    case 2:
+	do_put_mem_word (((uae_u16 *) mem) + bits, fgpen);
+	break;
+    case 3:
+	do_put_mem_byte (mem + bits * 3, fgpen & 0x000000FF);
+	*(uae_u16 *) (mem + bits * 3 + 1) = (fgpen & 0x00FFFF00) >> 8;
+	break;
+    case 4:
+	do_put_mem_long (((uae_u32 *) mem) + bits, fgpen);
+	break;
+    }
+}
+
+/*
+ * BlitPattern:
+ * 
+ * Synopsis:BlitPattern(bi, ri, pattern, X, Y, Width, Height, Mask, RGBFormat);
+ * Inputs:
+ * a0:struct BoardInfo *bi
+ * a1:struct RenderInfo *ri
+ * a2:struct Pattern *pattern
+ * d0.w:X
+ * d1.w:Y
+ * d2.w:Width
+ * d3.w:Height
+ * d4.w:Mask
+ * d7.l:RGBFormat
+ * 
+ * This function is used to paint a pattern on the board memory using the blitter. It is called by
+ * BltPattern, if a AreaPtrn is used with positive AreaPtSz. The pattern consists of a b/w image
+ * using a single plane of image data which will be expanded repeatedly to the destination RGBFormat
+ * using ForeGround and BackGround pens as well as draw modes. The width of the pattern data is
+ * always 16 pixels (one word) and the height is calculated as 2^Size. The data must be shifted up
+ * and to the left by XOffset and YOffset pixels at the beginning.
+ */
+uae_u32 picasso_BlitPattern (void)
+{
+    uaecptr rinf = m68k_areg (regs, 1);
+    uaecptr pinf = m68k_areg (regs, 2);
+    unsigned long X = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long Y = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long W = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long H = (uae_u16) m68k_dreg (regs, 3);
+    uae_u8 Mask = (uae_u8) m68k_dreg (regs, 4);
+    uae_u32 RGBFmt = m68k_dreg (regs, 7);
+
+    uae_u8 Bpp = GetBytesPerPixel (RGBFmt);
+    int inversion = 0;
+    struct RenderInfo ri;
+    struct Pattern pattern;
+    unsigned long rows;
+    uae_u32 fgpen;
+    uae_u8 *uae_mem;
+    int xshift;
+    unsigned long ysize_mask;
+
+    wgfx_flushline ();
+
+    if (! CopyRenderInfoStructureA2U (rinf, &ri)
+	|| !CopyPatternStructureA2U (pinf, &pattern))
+	return 0;
+
+    Bpp = GetBytesPerPixel (ri.RGBFormat);
+    uae_mem = ri.Memory + Y * ri.BytesPerRow + X * Bpp;	/* offset with address */
+
+    if (pattern.DrawMode & INVERS)
+	inversion = 1;
+
+    pattern.DrawMode &= 0x03;
+    if (Mask != 0xFF) {
+	if (Bpp > 1)
+	    write_log ("ERROR - not obeying BlitPattern() mask 0x%x properly with Bpp %d.\n", Mask, Bpp);
+	else if (pattern.DrawMode == COMP) {
+	    write_log ("ERROR - Unsupported Mask value 0x%x with COMP Draw in BlitPattern(), using fallback method.\n", Mask);
+	    return 0;
+	}
+    }
+
+    P96TRACE (("BlitPattern() xy(%d,%d), wh(%d,%d) draw 0x%x, off(%d,%d), ph %d\n",
+	       X, Y, W, H, pattern.DrawMode, pattern.XOffset, pattern.YOffset, 1 << pattern.Size));
+#ifdef _DEBUG
+    DumpPattern (&pattern);
+#endif
+    ysize_mask = (1 << pattern.Size) - 1;
+    xshift = pattern.XOffset & 15;
+
+    for (rows = 0; rows < H; rows++, uae_mem += ri.BytesPerRow) {
+	unsigned long prow = (rows + pattern.YOffset) & ysize_mask;
+	unsigned int d = do_get_mem_word (((uae_u16 *) pattern.Memory) + prow);
+	uae_u8 *uae_mem2 = uae_mem;
+	unsigned long cols;
+
+	if (xshift != 0)
+	    d = (d << xshift) | (d >> (16 - xshift));
+
+	for (cols = 0; cols < W; cols += 16, uae_mem2 += Bpp << 4) {
+	    long bits;
+	    long max = W - cols;
+	    unsigned int data = d;
+
+	    if (max > 16)
+		max = 16;
+
+	    for (bits = 0; bits < max; bits++) {
+		int bit_set = data & 0x8000;
+		data <<= 1;
+		switch (pattern.DrawMode) {
+		case JAM1:
+		    if (inversion)
+			bit_set = !bit_set;
+		    if (bit_set)
+			PixelWrite (uae_mem2, bits, pattern.FgPen, Bpp, Mask);
+		    break;
+		case JAM2:
+		    if (inversion)
+			bit_set = !bit_set;
+		    if (bit_set)
+			PixelWrite (uae_mem2, bits, pattern.FgPen, Bpp, Mask);
+		    else
+			PixelWrite (uae_mem2, bits, pattern.BgPen, Bpp, Mask);
+		    break;
+		case COMP:
+		    if (bit_set) {
+			fgpen = pattern.FgPen;
+
+			switch (Bpp) {
+			case 1:
+			    {
+				uae_u8 *addr = uae_mem2 + bits;
+				do_put_mem_byte (addr, do_get_mem_byte (addr) ^ fgpen);
+			    }
+			    break;
+			case 2:
+			    {
+				uae_u16 *addr = ((uae_u16 *) uae_mem2) + bits;
+				do_put_mem_word (addr, do_get_mem_word (addr) ^ fgpen);
+			    }
+			    break;
+			case 3:
+			    {
+				uae_u32 *addr = (uae_u32 *) (uae_mem2 + bits * 3);
+				do_put_mem_long (addr, do_get_mem_long (addr) ^ (fgpen & 0x00FFFFFF));
+			    }
+			    break;
+			case 4:
+			    {
+				uae_u32 *addr = ((uae_u32 *) uae_mem2) + bits;
+				do_put_mem_long (addr, do_get_mem_long (addr) ^ fgpen);
+			    }
+			    break;
+			}
+		    }
+		    break;
+		}
+	    }
+	}
+    }
+
+    if (renderinfo_is_current_screen (&ri))
+	do_blit (&ri, Bpp, X, Y, X, Y, W, H, BLIT_SRC, 0);
+
+    return 1;
+}
+
+/*************************************************
+BlitTemplate:
+**************************************************
+* Synopsis: BlitTemplate(bi, ri, template, X, Y, Width, Height, Mask, RGBFormat);
+* a0: struct BoardInfo *bi
+* a1: struct RenderInfo *ri
+* a2: struct Template *template
+* d0.w: X
+* d1.w: Y
+* d2.w: Width
+* d3.w: Height
+* d4.w: Mask
+* d7.l: RGBFormat
+*
+* This function is used to paint a template on the board memory using the blitter.
+* It is called by BltPattern and BltTemplate. The template consists of a b/w image
+* using a single plane of image data which will be expanded to the destination RGBFormat
+* using ForeGround and BackGround pens as well as draw modes.
+***********************************************************************************/
+uae_u32 picasso_BlitTemplate (void)
+{
+    uae_u8 inversion = 0;
+    uaecptr rinf = m68k_areg (regs, 1);
+    uaecptr tmpl = m68k_areg (regs, 2);
+    unsigned long X = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long Y = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long W = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long H = (uae_u16) m68k_dreg (regs, 3);
+    uae_u16 Mask = (uae_u16) m68k_dreg (regs, 4);
+    struct Template tmp;
+    struct RenderInfo ri;
+    unsigned long rows;
+    int bitoffset;
+    uae_u32 fgpen;
+    uae_u8 *uae_mem, Bpp;
+    uae_u8 *tmpl_base;
+
+    wgfx_flushline ();
+
+    if (!CopyRenderInfoStructureA2U (rinf, &ri)
+	|| !CopyTemplateStructureA2U (tmpl, &tmp))
+	return 0;
+
+    Bpp = GetBytesPerPixel (ri.RGBFormat);
+    uae_mem = ri.Memory + Y * ri.BytesPerRow + X * Bpp;	/* offset into address */
+
+    if (tmp.DrawMode & INVERS)
+	inversion = 1;
+
+    tmp.DrawMode &= 0x03;
+    if (Mask != 0xFF) {
+	if (Bpp > 1)
+	    write_log ("ERROR - not obeying BlitTemplate() mask 0x%x properly with Bpp %d.\n", Mask, Bpp);
+	else if (tmp.DrawMode == COMP) {
+	    write_log ("ERROR - Unsupported Mask value 0x%x with COMP Draw in BlitTemplate(), using fallback method.\n", Mask);
+	    return 0;
+	}
+    }
+
+    P96TRACE (("BlitTemplate() xy(%d,%d), wh(%d,%d) draw 0x%x fg 0x%x bg 0x%x \n",
+	       X, Y, W, H, tmp.DrawMode, tmp.FgPen, tmp.BgPen));
+
+    bitoffset = tmp.XOffset % 8;
+
+#ifdef _DEBUG
+    DumpTemplate (&tmp, W, H);
+#endif
+
+    tmpl_base = tmp.Memory + tmp.XOffset / 8;
+
+    for (rows = 0; rows < H; rows++, uae_mem += ri.BytesPerRow, tmpl_base += tmp.BytesPerRow) {
+	unsigned long cols;
+	uae_u8 *tmpl_mem = tmpl_base;
+	uae_u8 *uae_mem2 = uae_mem;
+	unsigned int data = *tmpl_mem;
+
+	for (cols = 0; cols < W; cols += 8, uae_mem2 += Bpp << 3) {
+	    unsigned int byte;
+	    long bits;
+	    long max = W - cols;
+
+	    if (max > 8)
+		max = 8;
+
+	    data <<= 8;
+	    data |= *++tmpl_mem;
+
+	    byte = data >> (8 - bitoffset);
+
+	    for (bits = 0; bits < max; bits++) {
+		int bit_set = (byte & 0x80);
+		byte <<= 1;
+		switch (tmp.DrawMode) {
+		case JAM1:
+		    if (inversion)
+			bit_set = !bit_set;
+		    if (bit_set) {
+			fgpen = tmp.FgPen;
+			PixelWrite (uae_mem2, bits, fgpen, Bpp, Mask);
+		    }
+		    break;
+		case JAM2:
+		    if (inversion)
+			bit_set = !bit_set;
+		    fgpen = tmp.BgPen;
+		    if (bit_set)
+			fgpen = tmp.FgPen;
+
+		    PixelWrite (uae_mem2, bits, fgpen, Bpp, Mask);
+		    break;
+		case COMP:
+		    if (bit_set) {
+			fgpen = tmp.FgPen;
+
+			switch (Bpp) {
+			case 1:
+			    {
+				uae_u8 *addr = uae_mem2 + bits;
+				do_put_mem_byte (addr, do_get_mem_byte (addr) ^ fgpen);
+			    }
+			    break;
+			case 2:
+			    {
+				uae_u16 *addr = ((uae_u16 *) uae_mem2) + bits;
+				do_put_mem_word (addr, do_get_mem_word (addr) ^ fgpen);
+			    }
+			    break;
+			case 3:
+			    {
+				uae_u32 *addr = (uae_u32 *) (uae_mem2 + bits * 3);
+				do_put_mem_long (addr, do_get_mem_long (addr) ^ (fgpen & 0x00FFFFFF));
+			    }
+			    break;
+			case 4:
+			    {
+				uae_u32 *addr = ((uae_u32 *) uae_mem2) + bits;
+				do_put_mem_long (addr, do_get_mem_long (addr) ^ fgpen);
+			    }
+			    break;
+			}
+		    }
+		    break;
+		}
+	    }
+	}
+    }
+
+    if (renderinfo_is_current_screen (&ri))
+	do_blit (&ri, Bpp, X, Y, X, Y, W, H, BLIT_SRC, 0);
+
+    return 1;
+}
+
+/*
+ * CalculateBytesPerRow:
+ * a0: 	struct BoardInfo
+ * d0: 	uae_u16 Width
+ * d7:	RGBFTYPE RGBFormat
+ * This function calculates the amount of bytes needed for a line of
+ * "Width" pixels in the given RGBFormat.
+ */
+uae_u32 picasso_CalculateBytesPerRow (void)
+{
+    uae_u16 width = m68k_dreg (regs, 0);
+    uae_u32 type = m68k_dreg (regs, 7);
+
+    width = GetBytesPerPixel (type) * width;
+    P96TRACE  (("CalculateBytesPerRow() = %d\n", width)); 
+
+    return width;
+}
+
+/*
+ * SetDisplay:
+ * a0:	struct BoardInfo
+ * d0:	BOOL state
+ * This function enables and disables the video display.
+ * 
+ * NOTE: return the opposite of the state
+ */
+uae_u32 picasso_SetDisplay (void)
+{
+    uae_u32 state = m68k_dreg (regs, 0);
+    P96TRACE (("SetDisplay(%d)\n", state));
+    return !state;
+}
+
+/*
+ * WaitVerticalSync:
+ * a0:	struct BoardInfo
+ * This function waits for the next horizontal retrace.
+ */
+uae_u32 picasso_WaitVerticalSync (void)
+{
+    /*write_log ("WaitVerticalSync()\n"); */
+    return 1;
+}
+
+/* NOTE: Watch for those planeptrs of 0x00000000 and 0xFFFFFFFF for all zero / all one bitmaps !!!! */
+static void PlanarToChunky (struct RenderInfo *ri, struct BitMap *bm,
+			    unsigned long srcx, unsigned long srcy,
+			    unsigned long dstx, unsigned long dsty, unsigned long width, unsigned long height, uae_u8 mask)
+{
+    int j;
+
+    uae_u8 *PLANAR[8], *image = ri->Memory + dstx * GetBytesPerPixel (ri->RGBFormat) + dsty * ri->BytesPerRow;
+    int Depth = bm->Depth;
+    unsigned long rows, bitoffset = srcx & 7;
+    long eol_offset;
+
+    /* if (mask != 0xFF) 
+       write_log ("P2C - pixel-width = %d, bit-offset = %d\n", width, bitoffset); */
+
+    /* Set up our bm->Planes[] pointers to the right horizontal offset */
+    for (j = 0; j < Depth; j++) {
+	uae_u8 *p = bm->Planes[j];
+	if (p != &all_zeros_bitmap && p != &all_ones_bitmap)
+	    p += srcx / 8 + srcy * bm->BytesPerRow;
+	PLANAR[j] = p;
+	if ((mask & (1 << j)) == 0)
+	    PLANAR[j] = &all_zeros_bitmap;
+    }
+    eol_offset = (long) bm->BytesPerRow - (long) ((width + 7) >> 3);
+    for (rows = 0; rows < height; rows++, image += ri->BytesPerRow) {
+	unsigned long cols;
+
+	for (cols = 0; cols < width; cols += 8) {
+	    int k;
+	    uae_u32 a = 0, b = 0;
+	    unsigned int msk = 0xFF;
+	    long tmp = cols + 8 - width;
+	    if (tmp > 0) {
+		msk <<= tmp;
+		b = do_get_mem_long ((uae_u32 *) (image + cols + 4));
+		if (tmp < 4)
+		    b &= 0xFFFFFFFF >> (32 - tmp * 8);
+		else if (tmp > 4) {
+		    a = do_get_mem_long ((uae_u32 *) (image + cols));
+		    a &= 0xFFFFFFFF >> (64 - tmp * 8);
+		}
+	    }
+	    for (k = 0; k < Depth; k++) {
+		unsigned int data;
+		if (PLANAR[k] == &all_zeros_bitmap)
+		    data = 0;
+		else if (PLANAR[k] == &all_ones_bitmap)
+		    data = 0xFF;
+		else {
+		    data = (uae_u8) (do_get_mem_word ((uae_u16 *) PLANAR[k]) >> (8 - bitoffset));
+		    PLANAR[k]++;
+		}
+		data &= msk;
+		a |= p2ctab[data][0] << k;
+		b |= p2ctab[data][1] << k;
+	    }
+	    do_put_mem_long ((uae_u32 *) (image + cols), a);
+	    do_put_mem_long ((uae_u32 *) (image + cols + 4), b);
+	}
+	for (j = 0; j < Depth; j++) {
+	    if (PLANAR[j] != &all_zeros_bitmap && PLANAR[j] != &all_ones_bitmap) {
+		PLANAR[j] += eol_offset;
+	    }
+	}
+    }
+}
+
+/*
+ * BlitPlanar2Chunky:
+ * a0: struct BoardInfo *bi
+ * a1: struct BitMap *bm - source containing planar information and assorted details
+ * a2: struct RenderInfo *ri - dest area and its details
+ * d0.w: SrcX
+ * d1.w: SrcY
+ * d2.w: DstX
+ * d3.w: DstY
+ * d4.w: SizeX
+ * d5.w: SizeY
+ * d6.b: MinTerm - uh oh!
+ * d7.b: Mask - uh oh!
+ *
+ * This function is currently used to blit from planar bitmaps within system memory to chunky bitmaps
+ * on the board. Watch out for plane pointers that are 0x00000000 (represents a plane with all bits "0")
+ * or 0xffffffff (represents a plane with all bits "1").
+ */
+uae_u32 picasso_BlitPlanar2Chunky (void)
+{
+    uaecptr bm = m68k_areg (regs, 1);
+    uaecptr ri = m68k_areg (regs, 2);
+    unsigned long srcx = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long srcy = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long dstx = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long dsty = (uae_u16) m68k_dreg (regs, 3);
+    unsigned long width = (uae_u16) m68k_dreg (regs, 4);
+    unsigned long height = (uae_u16) m68k_dreg (regs, 5);
+    uae_u8 minterm = m68k_dreg (regs, 6) & 0xFF;
+    uae_u8 mask = m68k_dreg (regs, 7) & 0xFF;
+    struct RenderInfo local_ri;
+    struct BitMap local_bm;
+
+    wgfx_flushline ();
+
+    if (minterm != 0x0C) {
+	write_log ("ERROR - BlitPlanar2Chunky() has minterm 0x%x, which I don't handle. Using fall-back routine.\n", minterm);
+	return 0;
+    }
+    if (!CopyRenderInfoStructureA2U (ri, &local_ri)
+	|| !CopyBitMapStructureA2U (bm, &local_bm))
+	return 0;
+
+    P96TRACE (("BlitPlanar2Chunky(%d, %d, %d, %d, %d, %d) Minterm 0x%x, Mask 0x%x, Depth %d\n",
+	       srcx, srcy, dstx, dsty, width, height, minterm, mask, local_bm.Depth));
+    P96TRACE (("P2C - BitMap has %d BPR, %d rows\n", local_bm.BytesPerRow, local_bm.Rows));
+    PlanarToChunky (&local_ri, &local_bm, srcx, srcy, dstx, dsty, width, height, mask);
+    if (renderinfo_is_current_screen (&local_ri))
+	do_blit (&local_ri, GetBytesPerPixel (local_ri.RGBFormat), dstx, dsty, dstx, dsty, width, height, BLIT_SRC, 0);
+
+    return 1;
+}
+
+static void PlanarToDirect (struct RenderInfo *ri, struct BitMap *bm,
+			    unsigned long srcx, unsigned long srcy,
+			    unsigned long dstx, unsigned long dsty,
+			    unsigned long width, unsigned long height, uae_u8 mask, struct ColorIndexMapping *cim)
+{
+    int j;
+    int bpp = GetBytesPerPixel (ri->RGBFormat);
+    uae_u8 *PLANAR[8];
+    uae_u8 *image = ri->Memory + dstx * bpp + dsty * ri->BytesPerRow;
+    int Depth = bm->Depth;
+    unsigned long rows;
+    long eol_offset;
+
+    /* Set up our bm->Planes[] pointers to the right horizontal offset */
+    for (j = 0; j < Depth; j++) {
+	uae_u8 *p = bm->Planes[j];
+	if (p != &all_zeros_bitmap && p != &all_ones_bitmap)
+	    p += srcx / 8 + srcy * bm->BytesPerRow;
+	PLANAR[j] = p;
+	if ((mask & (1 << j)) == 0)
+	    PLANAR[j] = &all_zeros_bitmap;
+    }
+
+    eol_offset = (long) bm->BytesPerRow - (long) ((width + (srcx & 7)) >> 3);
+    for (rows = 0; rows < height; rows++, image += ri->BytesPerRow) {
+	unsigned long cols;
+	uae_u8 *image2 = image;
+	unsigned int bitoffs = 7 - (srcx & 7);
+	int i;
+
+	for (cols = 0; cols < width; cols++) {
+	    int v = 0, k;
+	    for (k = 0; k < Depth; k++) {
+		if (PLANAR[k] == &all_ones_bitmap)
+		    v |= 1 << k;
+		else if (PLANAR[k] != &all_zeros_bitmap) {
+		    v |= ((*PLANAR[k] >> bitoffs) & 1) << k;
+		}
+	    }
+
+	    switch (bpp) {
+	    case 2:
+		do_put_mem_word ((uae_u16 *) image2, cim->Colors[v]);
+		image2 += 2;
+		break;
+	    case 3:
+		do_put_mem_byte (image2++, cim->Colors[v] & 0x000000FF);
+		do_put_mem_word ((uae_u16 *) image2, (cim->Colors[v] & 0x00FFFF00) >> 8);
+		image2 += 2;
+		break;
+	    case 4:
+		do_put_mem_long ((uae_u32 *) image2, cim->Colors[v]);
+		image2 += 4;
+		break;
+	    }
+	    bitoffs--;
+	    bitoffs &= 7;
+	    if (bitoffs == 7) {
+		int k;
+		for (k = 0; k < Depth; k++) {
+		    if (PLANAR[k] != &all_zeros_bitmap && PLANAR[k] != &all_ones_bitmap) {
+			PLANAR[k]++;
+		    }
+		}
+	    }
+	}
+
+	for (i = 0; i < Depth; i++) {
+	    if (PLANAR[i] != &all_zeros_bitmap && PLANAR[i] != &all_ones_bitmap) {
+		PLANAR[i] += eol_offset;
+	    }
+	}
+    }
+}
+
+/*
+ * BlitPlanar2Direct: 
+ * 
+ * Synopsis:
+ * BlitPlanar2Direct(bi, bm, ri, cim, SrcX, SrcY, DstX, DstY, SizeX, SizeY, MinTerm, Mask);
+ * Inputs:
+ * a0:struct BoardInfo *bi
+ * a1:struct BitMap *bm
+ * a2:struct RenderInfo *ri
+ * a3:struct ColorIndexMapping *cmi
+ * d0.w:SrcX
+ * d1.w:SrcY
+ * d2.w:DstX
+ * d3.w:DstY
+ * d4.w:SizeX
+ * d5.w:SizeY
+ * d6.b:MinTerm
+ * d7.b:Mask
+ * 
+ * This function is currently used to blit from planar bitmaps within system memory to direct color
+ * bitmaps (15, 16, 24 or 32 bit) on the board. Watch out for plane pointers that are 0x00000000 (represents
+ * a plane with all bits "0") or 0xffffffff (represents a plane with all bits "1"). The ColorIndexMapping is
+ * used to map the color index of each pixel formed by the bits in the bitmap's planes to a direct color value
+ * which is written to the destination RenderInfo. The color mask and all colors within the mapping are words,
+ * triple bytes or longwords respectively similar to the color values used in FillRect(), BlitPattern() or
+ * BlitTemplate(). 
+ */
+uae_u32 picasso_BlitPlanar2Direct (void)
+{
+    uaecptr bm = m68k_areg (regs, 1);
+    uaecptr ri = m68k_areg (regs, 2);
+    uaecptr cim = m68k_areg (regs, 3);
+    unsigned long srcx = (uae_u16) m68k_dreg (regs, 0);
+    unsigned long srcy = (uae_u16) m68k_dreg (regs, 1);
+    unsigned long dstx = (uae_u16) m68k_dreg (regs, 2);
+    unsigned long dsty = (uae_u16) m68k_dreg (regs, 3);
+    unsigned long width = (uae_u16) m68k_dreg (regs, 4);
+    unsigned long height = (uae_u16) m68k_dreg (regs, 5);
+    uae_u8 minterm = m68k_dreg (regs, 6);
+    uae_u8 Mask = m68k_dreg (regs, 7);
+    struct RenderInfo local_ri;
+    struct BitMap local_bm;
+    struct ColorIndexMapping local_cim;
+
+    wgfx_flushline ();
+
+    if (minterm != 0x0C) {
+	write_log ("ERROR - BlitPlanar2Direct() has op-code 0x%x, which I don't handle. Using fall-back routine.\n", minterm);
+	return 0;
+    }
+    if (Mask != 0xFF) {
+	write_log ("ERROR - Unsupported Mask value 0x%x in BlitPlanar2Direct(), using fallback method.\n", Mask);
+	return 0;
+    }
+    if (!CopyRenderInfoStructureA2U (ri, &local_ri)
+	|| !CopyBitMapStructureA2U (bm, &local_bm))
+	return 0;
+
+    CopyColorIndexMappingA2U (cim, &local_cim);
+    P96TRACE (("BlitPlanar2Direct(%d, %d, %d, %d, %d, %d) Minterm 0x%x, Mask 0x%x, Depth %d\n",
+       srcx, srcy, dstx, dsty, width, height, minterm, Mask, local_bm.Depth));
+    PlanarToDirect (&local_ri, &local_bm, srcx, srcy, dstx, dsty, width, height, Mask, &local_cim);
+    if (renderinfo_is_current_screen (&local_ri))
+	do_blit (&local_ri, GetBytesPerPixel (local_ri.RGBFormat), dstx, dsty, dstx, dsty, width, height, BLIT_SRC, 0);
+    return 1;
+}
+
+/* @@@ - Work to be done here!
+ *
+ * The address is the offset into our Picasso96 frame-buffer (pointed to by gfxmem_start)
+ * where the value was put.
+ *
+ * Porting work: on some machines you may not need these functions, ie. if the memory for the
+ * Picasso96 frame-buffer is directly viewable or directly blittable.  On Win32 with DirectX,
+ * this is not the case.  So I provide some write-through functions (as per Mathias' orders!)
+ */
+static void write_gfx_long (uaecptr addr, uae_u32 value)
+{
+    uaecptr oldaddr = addr;
+    int x, xbytes, y;
+    uae_u8 *dst;
+
+    if (!picasso_on)
+	return;
+
+    /*
+     * Several writes to successive memory locations are a common access pattern.
+     * Try to optimize it.
+     */
+    if (addr >= wgfx_linestart && addr + 4 <= wgfx_lineend) {
+	if (addr < wgfx_min)
+	    wgfx_min = addr;
+	if (addr + 4 > wgfx_max)
+	    wgfx_max = addr + 4;
+	return;
+    } else
+	wgfx_flushline ();
+
+    addr += gfxmem_start;
+    /* Check to see if this needs to be written through to the display, or was it an "offscreen" area? */
+    if (addr < picasso96_state.Address || addr + 4 > picasso96_state.Extent)
+	return;
+
+    addr -= picasso96_state.Address;
+    y = addr / picasso96_state.BytesPerRow;
+
+    if (y >= picasso96_state.VirtualHeight)
+	return;
+    wgfx_linestart = picasso96_state.Address - gfxmem_start + y * picasso96_state.BytesPerRow;
+    wgfx_lineend = wgfx_linestart + picasso96_state.BytesPerRow;
+    wgfx_y = y;
+    wgfx_min = oldaddr;
+    wgfx_max = oldaddr + 4;
+}
+
+static void write_gfx_word (uaecptr addr, uae_u16 value)
+{
+    uaecptr oldaddr = addr;
+    int x, xbytes, y;
+    uae_u8 *dst;
+
+    if (!picasso_on)
+	return;
+
+    /*
+     * Several writes to successive memory locations are a common access pattern.
+     * Try to optimize it.
+     */
+    if (addr >= wgfx_linestart && addr + 2 <= wgfx_lineend) {
+	if (addr < wgfx_min)
+	    wgfx_min = addr;
+	if (addr + 2 > wgfx_max)
+	    wgfx_max = addr + 2;
+	return;
+    } else
+	wgfx_flushline ();
+
+    addr += gfxmem_start;
+    /* Check to see if this needs to be written through to the display, or was it an "offscreen" area? */
+    if (addr < picasso96_state.Address || addr + 2 > picasso96_state.Extent)
+	return;
+
+    addr -= picasso96_state.Address;
+    y = addr / picasso96_state.BytesPerRow;
+
+    if (y >= picasso96_state.VirtualHeight)
+	return;
+    wgfx_linestart = picasso96_state.Address - gfxmem_start + y * picasso96_state.BytesPerRow;
+    wgfx_lineend = wgfx_linestart + picasso96_state.BytesPerRow;
+    wgfx_y = y;
+    wgfx_min = oldaddr;
+    wgfx_max = oldaddr + 2;
+}
+
+static void write_gfx_byte (uaecptr addr, uae_u8 value)
+{
+    uaecptr oldaddr = addr;
+    int x, xbytes, y;
+    uae_u8 *dst;
+
+    if (!picasso_on)
+	return;
+
+    /*
+     * Several writes to successive memory locations are a common access pattern.
+     * Try to optimize it.
+     */
+    if (addr >= wgfx_linestart && addr + 4 <= wgfx_lineend) {
+	if (addr < wgfx_min)
+	    wgfx_min = addr;
+	if (addr + 1 > wgfx_max)
+	    wgfx_max = addr + 1;
+	return;
+    } else
+	wgfx_flushline ();
+
+    addr += gfxmem_start;
+    /* Check to see if this needs to be written through to the display, or was it an "offscreen" area? */
+    if (addr < picasso96_state.Address || addr + 1 > picasso96_state.Extent)
+	return;
+
+    addr -= picasso96_state.Address;
+    y = addr / picasso96_state.BytesPerRow;
+
+    if (y >= picasso96_state.VirtualHeight)
+	return;
+    wgfx_linestart = picasso96_state.Address - gfxmem_start + y * picasso96_state.BytesPerRow;
+    wgfx_lineend = wgfx_linestart + picasso96_state.BytesPerRow;
+    wgfx_y = y;
+    wgfx_min = oldaddr;
+    wgfx_max = oldaddr + 1;
+}
+
+static uae_u32 REGPARAM2 gfxmem_lget (uaecptr addr)
+{
+    uae_u32 *m;
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    m = (uae_u32 *) (gfxmemory + addr);
+    return do_get_mem_long (m);
+}
+
+static uae_u32 REGPARAM2 gfxmem_wget (uaecptr addr)
+{
+    uae_u16 *m;
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    m = (uae_u16 *) (gfxmemory + addr);
+    return do_get_mem_word (m);
+}
+
+static uae_u32 REGPARAM2 gfxmem_bget (uaecptr addr)
+{
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    return gfxmemory[addr];
+}
+
+static void REGPARAM2 gfxmem_lput (uaecptr addr, uae_u32 l)
+{
+    uae_u32 *m;
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    m = (uae_u32 *) (gfxmemory + addr);
+    do_put_mem_long (m, l);
+
+    /* write the long-word to our displayable memory */
+    write_gfx_long (addr, l);
+}
+
+static void REGPARAM2 gfxmem_wput (uaecptr addr, uae_u32 w)
+{
+    uae_u16 *m;
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    m = (uae_u16 *) (gfxmemory + addr);
+    do_put_mem_word (m, (uae_u16) w);
+
+    /* write the word to our displayable memory */
+    write_gfx_word (addr, (uae_u16) w);
+}
+
+static void REGPARAM2 gfxmem_bput (uaecptr addr, uae_u32 b)
+{
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    gfxmemory[addr] = b;
+
+    /* write the byte to our displayable memory */
+    write_gfx_byte (addr, (uae_u8) b);
+}
+
+static int REGPARAM2 gfxmem_check (uaecptr addr, uae_u32 size)
+{
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    return (addr + size) < allocated_gfxmem;
+}
+
+static uae_u8 REGPARAM2 *gfxmem_xlate (uaecptr addr)
+{
+    addr -= gfxmem_start & gfxmem_mask;
+    addr &= gfxmem_mask;
+    return gfxmemory + addr;
+}
+
+addrbank gfxmem_bank = {
+    gfxmem_lget, gfxmem_wget, gfxmem_bget,
+    gfxmem_lput, gfxmem_wput, gfxmem_bput,
+    gfxmem_xlate, gfxmem_check, NULL
+};
+
+int picasso_display_mode_index (uae_u32 x, uae_u32 y, uae_u32 d)
+{
+    int i;
+    for (i = 0; i < mode_count; i++) {
+        if (DisplayModes[i].res.width == x
+	    && DisplayModes[i].res.height == y
+	    && DisplayModes[i].depth == d)
+            break;
+    }
+    if (i == mode_count)
+        i = -1;
+    return i;
+}
+
+static int resolution_compare (const void *a, const void *b)
+{
+    struct PicassoResolution *ma = (struct PicassoResolution *) a;
+    struct PicassoResolution *mb = (struct PicassoResolution *) b;
+    if (ma->res.width > mb->res.width)
+	return -1;
+    if (ma->res.width < mb->res.width)
+	return 1;
+    if (ma->res.height > mb->res.height)
+	return -1;
+    if (ma->res.height < mb->res.height)
+	return 1;
+    return ma->depth - mb->depth;
+}
+
+/* Call this function first, near the beginning of code flow
+ * NOTE: Don't stuff it in InitGraphics() which seems reasonable...
+ * Instead, put it in customreset() for safe-keeping.  */
+void InitPicasso96 (void)
+{
+    static int first_time = 1;
+
+    memset (&picasso96_state, 0, sizeof (struct picasso96_state_struct));
+
+    if (first_time) {
+	int i;
+
+	for (i = 0; i < 256; i++) {
+	    p2ctab[i][0] = (((i & 128) ? 0x01000000 : 0)
+			    | ((i & 64) ? 0x010000 : 0)
+			    | ((i & 32) ? 0x0100 : 0)
+			    | ((i & 16) ? 0x01 : 0));
+	    p2ctab[i][1] = (((i & 8) ? 0x01000000 : 0)
+			    | ((i & 4) ? 0x010000 : 0)
+			    | ((i & 2) ? 0x0100 : 0)
+			    | ((i & 1) ? 0x01 : 0));
+	}
+	mode_count = DX_FillResolutions (&picasso96_pixel_format);
+	qsort (DisplayModes, mode_count, sizeof (struct PicassoResolution), resolution_compare);
+
+	for (i = 0; i < mode_count; i++) {
+	    sprintf (DisplayModes[i].name, "%dx%d, %d-bit, %d Hz",
+		     DisplayModes[i].res.width, DisplayModes[i].res.height, DisplayModes[i].depth * 8, DisplayModes[i].refresh);
+	    switch (DisplayModes[i].depth) {
+	    case 1:
+		if (DisplayModes[i].res.width > chunky.width)
+		    chunky.width = DisplayModes[i].res.width;
+		if (DisplayModes[i].res.height > chunky.height)
+		    chunky.height = DisplayModes[i].res.height;
+		break;
+	    case 2:
+		if (DisplayModes[i].res.width > hicolour.width)
+		    hicolour.width = DisplayModes[i].res.width;
+		if (DisplayModes[i].res.height > hicolour.height)
+		    hicolour.height = DisplayModes[i].res.height;
+		break;
+	    case 3:
+		if (DisplayModes[i].res.width > truecolour.width)
+		    truecolour.width = DisplayModes[i].res.width;
+		if (DisplayModes[i].res.height > truecolour.height)
+		    truecolour.height = DisplayModes[i].res.height;
+		break;
+	    case 4:
+		if (DisplayModes[i].res.width > alphacolour.width)
+		    alphacolour.width = DisplayModes[i].res.width;
+		if (DisplayModes[i].res.height > alphacolour.height)
+		    alphacolour.height = DisplayModes[i].res.height;
+		break;
+	    }
+	}
+	ShowSupportedResolutions ();
+
+	first_time = 0;
+    }
+}
+
+#endif
diff -urN src-0.8.22/src/sdlgfx.c~ src-0.8.22-mmu/src/sdlgfx.c~
--- src-0.8.22/src/sdlgfx.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/sdlgfx.c~	2002-02-20 19:33:39.000000000 +0100
@@ -0,0 +1,1247 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  * SDL interface
+  *
+  * Copyright 2001 Bernd Lachner (EMail: dev@lachner-net.de)
+  *
+  * Partialy based on the UAE X interface (xwin.c)
+  *
+  * Copyright 1995, 1996 Bernd Schmidt
+  * Copyright 1996 Ed Hanway, Andre Beck, Samuel Devulder, Bruno Coste
+  * Copyright 1998 Marcus Sundberg
+  * DGA support by Kai Kollmorgen
+  * X11/DGA merge, hotkeys and grabmouse by Marcus Sundberg
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include <unistd.h>
+#include <signal.h>
+
+#include <SDL/SDL.h>
+#include <SDL/SDL_endian.h>
+
+#include "config.h"
+#include "options.h"
+#include "uae.h"
+#include "memory.h"
+#include "xwin.h"
+#include "custom.h"
+#include "drawing.h"
+#include "newcpu.h"
+#include "keyboard.h"
+#include "keybuf.h"
+#include "gui.h"
+#include "debug.h"
+#include "picasso96.h"
+
+/* Uncomment for debugging output */
+/* #define DEBUG */
+
+#ifdef __cplusplus
+static RETSIGTYPE sigbrkhandler(...)
+#else
+static RETSIGTYPE sigbrkhandler (int foo)
+#endif
+{
+	activate_debugger();
+#if !defined(__unix) || defined(__NeXT__)
+	signal (SIGINT, sigbrkhandler);
+#endif
+}
+
+void setup_brkhandler (void)
+{
+#if defined(__unix) && !defined(__NeXT__)
+    struct sigaction sa;
+    sa.sa_handler = sigbrkhandler;
+    sa.sa_flags = 0;
+#ifdef SA_RESTART
+    sa.sa_flags = SA_RESTART;
+#endif
+    sigemptyset (&sa.sa_mask);
+    sigaction (SIGINT, &sa, NULL);
+#else
+    signal (SIGINT, sigbrkhandler);
+#endif
+}
+
+/* SDL variable for output surface */
+static SDL_Surface *prSDLScreen = NULL;
+/* Possible screen modes (x and y resolutions) */
+#define MAX_SCREEN_MODES 11
+static int x_size_table[MAX_SCREEN_MODES] = { 320, 320, 320, 320, 640, 640, 640, 800, 1024, 1152, 1280 };
+static int y_size_table[MAX_SCREEN_MODES] = { 200, 240, 256, 400, 350, 480, 512, 600, 768, 864, 1024 };
+/* Possible screen depth (0 terminated) */
+static int aScreenDepth[] = {16, 15, 12, 0};
+
+static int red_bits, green_bits, blue_bits;
+static int red_shift, green_shift, blue_shift;
+
+static int screen_is_picasso;
+static char picasso_invalid_lines[1201];
+static int picasso_has_invalid_lines;
+static int picasso_invalid_start, picasso_invalid_stop;
+static int picasso_maxw = 0, picasso_maxh = 0;
+
+static int bitdepth, bit_unit;
+
+static int current_width, current_height;
+static SDL_Color arSDLColors[256];
+static int ncolors = 0;
+
+/* Keyboard and mouse */
+static int keystate[256];
+
+static void handle_mousegrab(void);
+static void handle_inhibit(void);
+static void framerate_up(void);
+static void framerate_down(void);
+static void togglefullscreen(void)
+{
+	SDL_WM_ToggleFullScreen(prSDLScreen);
+};
+
+static void handle_interpol (void);
+
+struct SDLHotKey
+{
+	SDLKey aHotKeys[2];
+	void (*pfHandler)(void);
+	long aPressedKeys[2];
+};
+
+
+static struct SDLHotKey arHotKeys[] =
+{
+    {{ SDLK_F12, SDLK_s}, togglefullscreen, {0, 0} },
+    {{ SDLK_F12, SDLK_q}, uae_quit, {0, 0} },
+    {{ SDLK_F12, SDLK_m}, togglemouse, {0, 0} },
+    {{ SDLK_F12, SDLK_g}, handle_mousegrab, {0, 0} },
+    {{ SDLK_F12, SDLK_i}, handle_inhibit, {0, 0} },
+    {{ SDLK_F12, SDLK_p}, handle_interpol, {0, 0} },
+    {{ SDLK_F12, SDLK_KP_PLUS}, framerate_up, {0, 0} },
+    {{ SDLK_F12, SDLK_KP_MINUS}, framerate_down, {0, 0} },
+    {{ 0, 0 }, NULL, {0, 0} }  /* List must be terminated */
+};
+
+void flush_line (int y)
+{
+    /* Not implemented for SDL output */
+#ifdef DEBUG
+    fprintf(stderr, "Function: flush_line\n");
+#endif
+}
+
+void flush_block (int ystart, int ystop)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: flush_block %d %d\n", ystart, ystop);
+#endif
+    SDL_UnlockSurface (prSDLScreen);
+    SDL_UpdateRect(prSDLScreen, 0, ystart, current_width, ystop-ystart+1);
+    SDL_LockSurface (prSDLScreen);
+}
+
+void flush_screen (int ystart, int ystop)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: flush_screen\n");
+#endif
+
+#if 0
+    SDL_UpdateRect(prSDLScreen, 0, 0, current_width, current_height);
+#endif
+}
+
+STATIC_INLINE int bitsInMask (unsigned long mask)
+{
+	/* count bits in mask */
+	int n = 0;
+	while (mask)
+	{
+		n += mask & 1;
+		mask >>= 1;
+	}
+	return n;
+}
+
+STATIC_INLINE int maskShift (unsigned long mask)
+{
+	/* determine how far mask is shifted */
+	int n = 0;
+	while (!(mask & 1))
+	{
+		n++;
+		mask >>= 1;
+	}
+	return n;
+}
+
+static int get_color (int r, int g, int b, xcolnr *cnp)
+{
+#ifdef DEBUG
+	fprintf(stderr, "Function: get_color\n");
+#endif
+
+	*cnp = SDL_MapRGB(prSDLScreen->format, r, g, b);
+	arSDLColors[ncolors].r = r;
+	arSDLColors[ncolors].g = g;
+	arSDLColors[ncolors].b = b;
+
+	ncolors++;
+	return 1;
+}
+
+static int init_colors (void)
+{
+	int i;
+
+#ifdef DEBUG
+	fprintf(stderr, "Function: init_colors\n");
+#endif
+
+	if (bitdepth > 8)
+	{
+		/* Truecolor: */
+		red_bits = bitsInMask(prSDLScreen->format->Rmask);
+		green_bits = bitsInMask(prSDLScreen->format->Gmask);
+		blue_bits = bitsInMask(prSDLScreen->format->Bmask);
+		red_shift = maskShift(prSDLScreen->format->Rmask);
+		green_shift = maskShift(prSDLScreen->format->Gmask);
+		blue_shift = maskShift(prSDLScreen->format->Bmask);
+		alloc_colors64k (red_bits, green_bits, blue_bits, red_shift, green_shift, blue_shift);
+	}
+	else
+	{
+		alloc_colors256 (get_color);
+		SDL_SetColors(prSDLScreen, arSDLColors, 0, 256);
+	}
+
+	switch (gfxvidinfo.pixbytes)
+	{
+	case 2:
+		for (i = 0; i < 4096; i++)
+		xcolors[i] = xcolors[i] * 0x00010001;
+		gfxvidinfo.can_double = 1;
+		break;
+	case 1:
+		for (i = 0; i < 4096; i++)
+		xcolors[i] = xcolors[i] * 0x01010101;
+		gfxvidinfo.can_double = 1;
+		break;
+	default:
+		gfxvidinfo.can_double = 0;
+		break;
+	}
+	if (SDL_BYTEORDER == SDL_BIG_ENDIAN)
+	{
+		switch (gfxvidinfo.pixbytes)
+		{
+		case 4:
+			for(i = 0; i < 4096; i++)
+				SDL_Swap32(xcolors[i]);
+			break;
+		case 2:
+			for (i = 0; i < 4096; i++)
+				SDL_Swap16(xcolors[i]);
+			break;
+		}
+	}
+	return 1;
+}
+
+int graphics_setup (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: graphics_setup\n");
+#endif
+
+    /* Initialize the SDL library */
+    if ( SDL_Init(SDL_INIT_VIDEO) < 0 )
+    {
+        fprintf(stderr, "Unable to init SDL: %s\n", SDL_GetError());
+        return 0;
+    }
+
+    return 1;
+}
+
+
+static void graphics_subinit (void)
+{
+	Uint32 uiSDLVidModFlags;
+
+#ifdef DEBUG
+	fprintf(stderr, "Function: graphics_subinit\n");
+#endif
+
+	/* Open SDL Window in current mode */
+	uiSDLVidModFlags = SDL_SWSURFACE;
+	if (bitdepth == 8)
+	{
+		uiSDLVidModFlags |= SDL_HWPALETTE;
+	}
+	if (currprefs.gfx_afullscreen || currprefs.gfx_pfullscreen)
+	{
+		uiSDLVidModFlags |= SDL_FULLSCREEN;
+	}
+	fprintf(stderr, "Resolution: %d x %d x %d\n", current_width, current_height, bitdepth);
+
+	prSDLScreen = SDL_SetVideoMode(current_width, current_height, bitdepth, uiSDLVidModFlags);
+	if (prSDLScreen == NULL)
+	{
+		fprintf(stderr, "Unable to set video mode: %s\n", SDL_GetError());
+		return;
+	}
+	else
+	{
+#ifdef DEBUG
+		fprintf(stderr, "Bytes per Pixel: %d\n", prSDLScreen->format->BytesPerPixel);
+		fprintf(stderr, "Bytes per Line: %d\n", prSDLScreen->pitch);
+#endif
+		SDL_LockSurface(prSDLScreen);
+		memset(prSDLScreen->pixels, 0, current_width * current_height * prSDLScreen->format->BytesPerPixel);
+		SDL_UnlockSurface(prSDLScreen);
+		SDL_UpdateRect(prSDLScreen, 0, 0, current_width, current_height);
+		/* Set UAE window title and icon name */
+		SDL_WM_SetCaption("UAE","UAE");
+		/* Hide mouse cursor */
+		SDL_ShowCursor(SDL_DISABLE);
+		/* Initialize structure for Amiga video modes */
+		gfxvidinfo.bufmem = prSDLScreen->pixels;
+		gfxvidinfo.linemem = 0;
+		gfxvidinfo.emergmem = 0;
+		gfxvidinfo.pixbytes = prSDLScreen->format->BytesPerPixel;
+		bit_unit = prSDLScreen->format->BytesPerPixel * 8;
+		gfxvidinfo.rowbytes = prSDLScreen->pitch;
+		gfxvidinfo.maxblocklines = 100;
+		gfxvidinfo.can_double = 0;
+		/* Initialize structure for Picasso96 video modes */
+		picasso_vidinfo.rowbytes = current_width * gfxvidinfo.pixbytes;
+		picasso_vidinfo.extra_mem = 1;
+		picasso_vidinfo.depth = bitdepth;
+		picasso_has_invalid_lines = 0;
+		picasso_invalid_start = picasso_vidinfo.height + 1;
+		picasso_invalid_stop = -1;
+		memset (picasso_invalid_lines, 0, sizeof picasso_invalid_lines);
+	}
+	lastmx = lastmy = 0;
+	newmousecounters = 0;
+}
+
+
+int graphics_init (void)
+{
+	int i,j;
+
+#ifdef DEBUG
+	fprintf(stderr, "Function: graphics_init\n");
+#endif
+
+	if (currprefs.color_mode > 5)
+		fprintf (stderr, "Bad color mode selected. Using default.\n"), currprefs.color_mode = 0;
+
+	screen_is_picasso = 0;
+
+	fixup_prefs_dimensions (&currprefs);
+
+
+	gfxvidinfo.width = currprefs.gfx_width;
+	gfxvidinfo.height = currprefs.gfx_height;
+	current_width = currprefs.gfx_width;
+	current_height = currprefs.gfx_height;
+
+	/* Find a SDL video mode with exact width and height */
+	for (i = 0; aScreenDepth[i] != 0; i++)
+	{
+		bitdepth = SDL_VideoModeOK(current_width, current_height, aScreenDepth[i], SDL_SWSURFACE);
+		if (bitdepth)
+		{
+			#ifdef DEBUG
+			fprintf(stderr, "Bit depth: %d\n", bitdepth);
+			#endif
+			break;
+		}
+		else
+		{
+			fprintf(stderr, "Video mode %dx%d@%d not available\n", current_width, current_height, aScreenDepth[i]);
+		}
+	}
+	if (bitdepth == 0)
+	{
+		/* Find a SDL video mode from standard resolutions */
+		for (j = 0; j < MAX_SCREEN_MODES && !bitdepth; j++)
+		{
+			if (x_size_table[j] < current_width || y_size_table[j] < current_height)
+				continue;
+			for (i = 0; aScreenDepth[i] != 0 && !bitdepth; i++)
+			{
+				bitdepth = SDL_VideoModeOK(x_size_table[j], y_size_table[j], aScreenDepth[i], SDL_SWSURFACE);
+				if (bitdepth)
+				{
+					#ifdef DEBUG
+					fprintf(stderr, "Bit depth: %d\n", bitdepth);
+					#endif
+					gfxvidinfo.width = current_width = x_size_table[j];
+					gfxvidinfo.height = current_height = y_size_table[j];
+					break;
+				}
+				else
+				{
+					fprintf(stderr, "Video mode %dx%d@%d not available\n", current_width, current_height, aScreenDepth[i]);
+				}
+			}
+		}
+    		if (bitdepth == 0)
+		{
+    			fprintf(stderr, "No video mode found!\n");
+			return 0;
+		}
+	}
+
+	graphics_subinit ();
+
+
+    if (!init_colors ())
+		return 0;
+
+    buttonstate[0] = buttonstate[1] = buttonstate[2] = 0;
+    for (i = 0; i < 256; i++)
+	keystate[i] = 0;
+
+    return 1;
+}
+
+static void graphics_subshutdown (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: graphics_subshutdown\n");
+#endif
+
+    SDL_FreeSurface(prSDLScreen);
+}
+
+void graphics_leave (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: graphics_leave\n");
+#endif
+
+    graphics_subshutdown ();
+
+	SDL_VideoQuit();
+
+    dumpcustom ();
+}
+
+/* Decode KeySyms. This function knows about all keys that are common
+ * between different keyboard languages. */
+static int kc_decode (SDL_keysym *prKeySym)
+{
+    switch (prKeySym->sym)
+    {
+    case SDLK_b: return AK_B;
+    case SDLK_c: return AK_C;
+    case SDLK_d: return AK_D;
+    case SDLK_e: return AK_E;
+    case SDLK_f: return AK_F;
+    case SDLK_g: return AK_G;
+    case SDLK_h: return AK_H;
+    case SDLK_i: return AK_I;
+    case SDLK_j: return AK_J;
+    case SDLK_k: return AK_K;
+    case SDLK_l: return AK_L;
+    case SDLK_n: return AK_N;
+    case SDLK_o: return AK_O;
+    case SDLK_p: return AK_P;
+    case SDLK_r: return AK_R;
+    case SDLK_s: return AK_S;
+    case SDLK_t: return AK_T;
+    case SDLK_u: return AK_U;
+    case SDLK_v: return AK_V;
+    case SDLK_x: return AK_X;
+
+    case SDLK_0: return AK_0;
+    case SDLK_1: return AK_1;
+    case SDLK_2: return AK_2;
+    case SDLK_3: return AK_3;
+    case SDLK_4: return AK_4;
+    case SDLK_5: return AK_5;
+    case SDLK_6: return AK_6;
+    case SDLK_7: return AK_7;
+    case SDLK_8: return AK_8;
+    case SDLK_9: return AK_9;
+
+    case SDLK_KP0: return AK_NP0;
+    case SDLK_KP1: return AK_NP1;
+    case SDLK_KP2: return AK_NP2;
+    case SDLK_KP3: return AK_NP3;
+    case SDLK_KP4: return AK_NP4;
+    case SDLK_KP5: return AK_NP5;
+    case SDLK_KP6: return AK_NP6;
+    case SDLK_KP7: return AK_NP7;
+    case SDLK_KP8: return AK_NP8;
+    case SDLK_KP9: return AK_NP9;
+    case SDLK_KP_DIVIDE: return AK_NPDIV;
+    case SDLK_KP_MULTIPLY: return AK_NPMUL;
+    case SDLK_KP_MINUS: return AK_NPSUB;
+    case SDLK_KP_PLUS: return AK_NPADD;
+    case SDLK_KP_PERIOD: return AK_NPDEL;
+    case SDLK_KP_ENTER: return AK_ENT;
+
+    case SDLK_F1: return AK_F1;
+    case SDLK_F2: return AK_F2;
+    case SDLK_F3: return AK_F3;
+    case SDLK_F4: return AK_F4;
+    case SDLK_F5: return AK_F5;
+    case SDLK_F6: return AK_F6;
+    case SDLK_F7: return AK_F7;
+    case SDLK_F8: return AK_F8;
+    case SDLK_F9: return AK_F9;
+    case SDLK_F10: return AK_F10;
+
+    case SDLK_BACKSPACE: return AK_BS;
+    case SDLK_DELETE: return AK_DEL;
+    case SDLK_LCTRL: return AK_CTRL;
+    case SDLK_RCTRL: return AK_RCTRL;
+    case SDLK_TAB: return AK_TAB;
+    case SDLK_LALT: return AK_LALT;
+    case SDLK_RALT: return AK_RALT;
+    case SDLK_RMETA: return AK_RAMI;
+    case SDLK_LMETA: return AK_LAMI;
+    case SDLK_RETURN: return AK_RET;
+    case SDLK_SPACE: return AK_SPC;
+    case SDLK_LSHIFT: return AK_LSH;
+    case SDLK_RSHIFT: return AK_RSH;
+    case SDLK_ESCAPE: return AK_ESC;
+
+    case SDLK_INSERT: return AK_HELP;
+    case SDLK_HOME: return AK_NPLPAREN;
+    case SDLK_END: return AK_NPRPAREN;
+    case SDLK_CAPSLOCK: return AK_CAPSLOCK;
+
+    case SDLK_UP: return AK_UP;
+    case SDLK_DOWN: return AK_DN;
+    case SDLK_LEFT: return AK_LF;
+    case SDLK_RIGHT: return AK_RT;
+
+    case SDLK_PAGEUP: return AK_RAMI;          /* PgUp mapped to right amiga */
+    case SDLK_PAGEDOWN: return AK_LAMI;        /* PgDn mapped to left amiga */
+
+    default: return -1;
+    }
+}
+
+static int decode_fr (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* FR specific */
+    case SDLK_a: return AK_Q;
+    case SDLK_m: return AK_SEMICOLON;
+    case SDLK_q: return AK_A;
+    case SDLK_y: return AK_Y;
+    case SDLK_w: return AK_Z;
+    case SDLK_z: return AK_W;
+    case SDLK_LEFTBRACKET: return AK_LBRACKET;
+    case SDLK_RIGHTBRACKET: return AK_RBRACKET;
+    case SDLK_COMMA: return AK_M;
+    case SDLK_LESS: case SDLK_GREATER: return AK_LTGT;
+    case SDLK_PERIOD: case SDLK_SEMICOLON: return AK_COMMA;
+    case SDLK_RIGHTPAREN: return AK_MINUS;
+    case SDLK_EQUALS: return AK_SLASH;
+    case SDLK_HASH: return AK_NUMBERSIGN;
+    case SDLK_SLASH: return AK_PERIOD;
+    case SDLK_MINUS: return AK_EQUAL;
+    case SDLK_BACKSLASH: return AK_BACKSLASH;
+    default: return -1;
+    }
+}
+
+static int decode_us (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* US specific */
+    case SDLK_a: return AK_A;
+    case SDLK_m: return AK_M;
+    case SDLK_q: return AK_Q;
+    case SDLK_y: return AK_Y;
+    case SDLK_w: return AK_W;
+    case SDLK_z: return AK_Z;
+    case SDLK_LEFTBRACKET: return AK_LBRACKET;
+    case SDLK_RIGHTBRACKET: return AK_RBRACKET;
+    case SDLK_COMMA: return AK_COMMA;
+    case SDLK_PERIOD: return AK_PERIOD;
+    case SDLK_SLASH: return AK_SLASH;
+    case SDLK_SEMICOLON: return AK_SEMICOLON;
+    case SDLK_MINUS: return AK_MINUS;
+    case SDLK_EQUALS: return AK_EQUAL;
+	/* this doesn't work: */
+    case SDLK_BACKQUOTE: return AK_QUOTE;
+    case SDLK_QUOTE: return AK_BACKQUOTE;
+    case SDLK_BACKSLASH: return AK_BACKSLASH;
+    default: return -1;
+    }
+}
+
+static int decode_de (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* DE specific */
+    case SDLK_a: return AK_A;
+    case SDLK_m: return AK_M;
+    case SDLK_q: return AK_Q;
+    case SDLK_w: return AK_W;
+    case SDLK_y: return AK_Z;
+    case SDLK_z: return AK_Y;
+	/* German umlaut oe */
+    case SDLK_WORLD_86: return AK_SEMICOLON;
+	/* German umlaut ae */
+    case SDLK_WORLD_68: return AK_QUOTE;
+	/* German umlaut ue */
+    case SDLK_WORLD_92: return AK_LBRACKET;
+    case SDLK_PLUS: case SDLK_ASTERISK: return AK_RBRACKET;
+    case SDLK_COMMA: return AK_COMMA;
+    case SDLK_PERIOD: return AK_PERIOD;
+    case SDLK_LESS: case SDLK_GREATER: return AK_LTGT;
+    case SDLK_HASH: return AK_NUMBERSIGN;
+	/* German sharp s */
+    case SDLK_WORLD_63: return AK_MINUS;
+    case SDLK_QUOTE: return AK_EQUAL;
+    case SDLK_CARET: return AK_BACKQUOTE;
+    case SDLK_MINUS: return AK_SLASH;
+    default: return -1;
+    }
+}
+
+static int decode_se (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* SE specific */
+    case SDLK_a: return AK_A;
+    case SDLK_m: return AK_M;
+    case SDLK_q: return AK_Q;
+    case SDLK_w: return AK_W;
+    case SDLK_y: return AK_Y;
+    case SDLK_z: return AK_Z;
+    case SDLK_WORLD_86: return AK_SEMICOLON;
+    case SDLK_WORLD_68: return AK_QUOTE;
+    case SDLK_WORLD_69: return AK_LBRACKET;
+    case SDLK_COMMA: return AK_COMMA;
+    case SDLK_PERIOD: return AK_PERIOD;
+    case SDLK_MINUS: return AK_SLASH;
+    case SDLK_LESS: case SDLK_GREATER: return AK_LTGT;
+    case SDLK_PLUS: case SDLK_QUESTION: return AK_EQUAL;
+    case SDLK_AT: case SDLK_WORLD_29: return AK_BACKQUOTE;
+    case SDLK_CARET: return AK_RBRACKET;
+    case SDLK_BACKSLASH: return AK_MINUS;
+    case SDLK_HASH: return AK_NUMBERSIGN;
+    default: return -1;
+    }
+}
+
+static int decode_it (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* IT specific */
+    case SDLK_a: return AK_A;
+    case SDLK_m: return AK_M;
+    case SDLK_q: return AK_Q;
+    case SDLK_w: return AK_W;
+    case SDLK_y: return AK_Y;
+    case SDLK_z: return AK_Z;
+    case SDLK_WORLD_82: return AK_SEMICOLON;
+    case SDLK_WORLD_64: return AK_QUOTE;
+    case SDLK_WORLD_72: return AK_LBRACKET;
+    case SDLK_PLUS: case SDLK_ASTERISK: return AK_RBRACKET;
+    case SDLK_COMMA: return AK_COMMA;
+    case SDLK_PERIOD: return AK_PERIOD;
+    case SDLK_LESS: case SDLK_GREATER: return AK_LTGT;
+    case SDLK_BACKSLASH: return AK_BACKQUOTE;
+    case SDLK_QUOTE: return AK_MINUS;
+    case SDLK_WORLD_76: return AK_EQUAL;
+    case SDLK_MINUS: return AK_SLASH;
+    case SDLK_HASH: return AK_NUMBERSIGN;
+    default: return -1;
+    }
+}
+
+static int decode_es (SDL_keysym *prKeySym)
+{
+    switch(prKeySym->sym)
+    {
+	/* ES specific */
+    case SDLK_a: return AK_A;
+    case SDLK_m: return AK_M;
+    case SDLK_q: return AK_Q;
+    case SDLK_w: return AK_W;
+    case SDLK_y: return AK_Y;
+    case SDLK_z: return AK_Z;
+    case SDLK_WORLD_81: return AK_SEMICOLON;
+    case SDLK_PLUS: case SDLK_ASTERISK: return AK_RBRACKET;
+    case SDLK_COMMA: return AK_COMMA;
+    case SDLK_PERIOD: return AK_PERIOD;
+    case SDLK_LESS: case SDLK_GREATER: return AK_LTGT;
+    case SDLK_BACKSLASH: return AK_BACKQUOTE;
+    case SDLK_QUOTE: return AK_MINUS;
+    case SDLK_WORLD_76: return AK_EQUAL;
+    case SDLK_MINUS: return AK_SLASH;
+    case SDLK_HASH: return AK_NUMBERSIGN;
+    default: return -1;
+    }
+}
+
+static int keycode2amiga(SDL_keysym *prKeySym)
+{
+    int iAmigaKeycode = kc_decode(prKeySym);
+    if (iAmigaKeycode == -1)
+    {
+        switch (currprefs.keyboard_lang)
+        {
+        case KBD_LANG_FR:
+            return decode_fr(prKeySym);
+        case KBD_LANG_US:
+            return decode_us(prKeySym);
+        case KBD_LANG_DE:
+            return decode_de(prKeySym);
+        case KBD_LANG_SE:
+            return decode_se (prKeySym);
+        case KBD_LANG_IT:
+            return decode_it (prKeySym);
+        case KBD_LANG_ES:
+            return decode_es (prKeySym);
+        default:
+            return -1;
+        }
+    }
+    return iAmigaKeycode;
+}
+
+static int refresh_necessary = 0;
+
+void handle_events (void)
+{
+    SDL_Event rEvent;
+    int iAmigaKeyCode;
+    int i, j;
+    int iIsHotKey = 0;
+#ifdef DEBUG
+    fprintf(stderr, "Function: handle_events\n");
+#endif
+
+    /* Handle GUI events */
+    gui_handle_events ();
+
+    while (SDL_PollEvent(&rEvent))
+    {
+	switch (rEvent.type)
+	{
+	case SDL_QUIT:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: quit\n");
+#endif
+	    uae_quit();
+	    break;
+        case SDL_KEYDOWN:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: key down\n");
+#endif
+	    /* Check for hotkey sequence */
+	    i = 0;
+	    while (arHotKeys[i].pfHandler != NULL)
+	    {
+		if (rEvent.key.keysym.sym == arHotKeys[i].aHotKeys[0])
+		{
+		    arHotKeys[i].aPressedKeys[0] = 1;
+		    iIsHotKey = 1;
+		}
+		if (arHotKeys[i].aPressedKeys[0] == 1 &&
+		    rEvent.key.keysym.sym == arHotKeys[i].aHotKeys[1])
+		{
+		    arHotKeys[i].aPressedKeys[1] = 1;
+		    arHotKeys[i].pfHandler();
+		    iIsHotKey = 1;
+		}
+		i++;
+	    }
+	    if (iIsHotKey == 0)
+	    {
+				/* No hotkey sequence */
+		iAmigaKeyCode = keycode2amiga(&(rEvent.key.keysym));
+		if (iAmigaKeyCode >= 0)
+		{
+		    if (!keystate[iAmigaKeyCode])
+		    {
+			keystate[iAmigaKeyCode] = 1;
+			record_key(iAmigaKeyCode << 1);
+		    }
+		}
+	    }
+	    break;
+	case SDL_KEYUP:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: key up\n");
+#endif
+	    /* Check for hotkey sequence */
+	    i = 0;
+	    while (arHotKeys[i].pfHandler != NULL)
+	    {
+		for (j = 0; j < 2; j++)
+		{
+		    if (rEvent.key.keysym.sym == arHotKeys[i].aHotKeys[j] &&
+			arHotKeys[i].aPressedKeys[j] == 1)
+		    {
+			arHotKeys[i].aPressedKeys[j] = 0;
+			iIsHotKey = 1;
+		    }
+		}
+		i++;
+	    }
+	    if (iIsHotKey == 0)
+	    {
+		iAmigaKeyCode = keycode2amiga(&(rEvent.key.keysym));
+		if (iAmigaKeyCode >= 0)
+		{
+		    keystate[iAmigaKeyCode] = 0;
+		    record_key((iAmigaKeyCode << 1) | 1);
+		}
+	    }
+	    break;
+	case SDL_MOUSEBUTTONDOWN:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: mouse button down\n");
+#endif
+	    buttonstate[rEvent.button.button-1] = 1;
+	    break;
+	case SDL_MOUSEBUTTONUP:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: mouse button up\n");
+#endif
+	    buttonstate[rEvent.button.button-1] = 0;
+	    break;
+	case SDL_MOUSEMOTION:
+#ifdef DEBUG
+	    fprintf(stderr, "Event: mouse motion\n");
+#endif
+	    newmousecounters = 1;
+	    lastmx += rEvent.motion.xrel;
+	    lastmy += rEvent.motion.yrel;
+	    break;
+	}
+    }
+#if defined PICASSO96
+    if (screen_is_picasso && refresh_necessary)
+    {
+	SDL_UpdateRect(prSDLScreen, 0, 0, picasso_vidinfo.width, picasso_vidinfo.height);
+	refresh_necessary = 0;
+	memset (picasso_invalid_lines, 0, sizeof picasso_invalid_lines);
+    }
+    else if (screen_is_picasso && picasso_has_invalid_lines)
+    {
+	int i;
+	int strt = -1;
+	picasso_invalid_lines[picasso_vidinfo.height] = 0;
+	for (i = picasso_invalid_start; i < picasso_invalid_stop + 2; i++)
+	{
+	    if (picasso_invalid_lines[i])
+	    {
+		picasso_invalid_lines[i] = 0;
+		if (strt != -1)
+		    continue;
+		strt = i;
+	    }
+	    else
+	    {
+		if (strt == -1)
+		    continue;
+		SDL_UpdateRect(prSDLScreen, 0, strt, picasso_vidinfo.width, i-strt);
+		strt = -1;
+	    }
+	}
+	if (strt != -1)
+	    abort ();
+    }
+    picasso_has_invalid_lines = 0;
+    picasso_invalid_start = picasso_vidinfo.height + 1;
+    picasso_invalid_stop = -1;
+#endif
+
+    /* Handle UAE reset */
+    if ((keystate[AK_CTRL] || keystate[AK_RCTRL]) && keystate[AK_LAMI] && keystate[AK_RAMI])
+	uae_reset ();
+}
+
+int check_prefs_changed_gfx (void)
+{
+
+    if (changed_prefs.gfx_width != currprefs.gfx_width
+	|| changed_prefs.gfx_height != currprefs.gfx_height)
+    {
+	fixup_prefs_dimensions (&changed_prefs);
+    }
+
+    if (changed_prefs.gfx_width == currprefs.gfx_width
+	&& changed_prefs.gfx_height == currprefs.gfx_height
+	&& changed_prefs.gfx_lores == currprefs.gfx_lores
+	&& changed_prefs.gfx_linedbl == currprefs.gfx_linedbl
+	&& changed_prefs.gfx_correct_aspect == currprefs.gfx_correct_aspect
+	&& changed_prefs.gfx_xcenter == currprefs.gfx_xcenter
+	&& changed_prefs.gfx_ycenter == currprefs.gfx_ycenter
+	&& changed_prefs.gfx_afullscreen == currprefs.gfx_afullscreen
+	&& changed_prefs.gfx_pfullscreen == currprefs.gfx_pfullscreen)
+    {
+	return 0;
+    }
+#ifdef DEBUG
+    fprintf(stderr, "Function: check_prefs_changed_gfx\n");
+#endif
+    graphics_subshutdown ();
+    currprefs.gfx_width = changed_prefs.gfx_width;
+    currprefs.gfx_height = changed_prefs.gfx_height;
+    currprefs.gfx_lores = changed_prefs.gfx_lores;
+    currprefs.gfx_linedbl = changed_prefs.gfx_linedbl;
+    currprefs.gfx_correct_aspect = changed_prefs.gfx_correct_aspect;
+    currprefs.gfx_xcenter = changed_prefs.gfx_xcenter;
+    currprefs.gfx_ycenter = changed_prefs.gfx_ycenter;
+    currprefs.gfx_afullscreen = changed_prefs.gfx_afullscreen;
+    currprefs.gfx_pfullscreen = changed_prefs.gfx_pfullscreen;
+
+    gui_update_gfx ();
+
+    graphics_subinit ();
+
+    /*    if (! inwindow)
+	  XWarpPointer (display, None, mywin, 0, 0, 0, 0,
+	  current_width / 2, current_height / 2);
+    */
+    notice_screen_contents_lost ();
+    init_row_map ();
+    if (screen_is_picasso)
+	picasso_enablescreen (1);
+    return 0;
+}
+
+int debuggable (void)
+{
+    return 1;
+}
+
+int needmousehack (void)
+{
+    return 1;
+}
+
+void LED (int on)
+{
+#if 0 /* Maybe that is responsible for the joystick emulation problems on SunOS? */
+    static int last_on = -1;
+    XKeyboardControl control;
+
+    if (last_on == on)
+	return;
+    last_on = on;
+    control.led = 1; /* implementation defined */
+    control.led_mode = on ? LedModeOn : LedModeOff;
+    XChangeKeyboardControl(display, KBLed | KBLedMode, &control);
+#endif
+}
+
+#ifdef PICASSO96
+
+void DX_Invalidate (int first, int last)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: DX_Invalidate %i - %i\n", first, last);
+#endif
+
+    if (first > last)
+	return;
+
+    picasso_has_invalid_lines = 1;
+    if (first < picasso_invalid_start)
+	picasso_invalid_start = first;
+    if (last > picasso_invalid_stop)
+	picasso_invalid_stop = last;
+
+    while (first <= last)
+    {
+	picasso_invalid_lines[first] = 1;
+	first++;
+    }
+}
+
+int DX_BitsPerCannon (void)
+{
+    return 8;
+}
+
+static int palette_update_start=256;
+static int palette_update_end=0;
+
+void DX_SetPalette (int start, int count)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: DX_SetPalette_real\n");
+#endif
+
+    if (! screen_is_picasso || picasso96_state.RGBFormat != RGBFB_CHUNKY)
+	return;
+
+    if (picasso_vidinfo.pixbytes != 1)
+    {
+	/* This is the case when we're emulating a 256 color display.  */
+	while (count-- > 0)
+	{
+	    int r = picasso96_state.CLUT[start].Red;
+	    int g = picasso96_state.CLUT[start].Green;
+	    int b = picasso96_state.CLUT[start].Blue;
+	    picasso_vidinfo.clut[start++] = (doMask256 (r, red_bits, red_shift)
+					     | doMask256 (g, green_bits, green_shift)
+					     | doMask256 (b, blue_bits, blue_shift));
+	}
+	return;
+    }
+
+    while (count-- > 0)
+    {
+#if 0
+	XColor col = parsed_xcolors[start];
+	col.red = picasso96_state.CLUT[start].Red * 0x0101;
+	col.green = picasso96_state.CLUT[start].Green * 0x0101;
+	col.blue = picasso96_state.CLUT[start].Blue * 0x0101;
+	XStoreColor (display, cmap, &col);
+#endif
+	start++;
+    }
+}
+
+int DX_FillResolutions (uae_u16 *ppixel_format)
+{
+    int i, count = 0;
+    int w = 0;
+    int h = 0;
+    int emulate_chunky = 0;
+
+#ifdef DEBUG
+    fprintf(stderr, "Function: DX_FillResolutions\n");
+#endif
+
+    /* Find out, which is the highest resolution the SDL can offer */
+    for (i = MAX_SCREEN_MODES-1; i>=0; i--)
+    {
+	if (bitdepth == SDL_VideoModeOK(x_size_table[i], y_size_table[i], bitdepth, SDL_SWSURFACE))
+	{
+	    w = x_size_table[i];
+	    h = y_size_table[i];
+	    break;
+	}
+    }
+
+#ifdef DEBUG
+    fprintf(stderr, "Max. Picasso screen size: %d x %d\n", w, h);
+#endif
+
+    picasso_vidinfo.rgbformat = (bit_unit == 8 ? RGBFB_CHUNKY
+				 : bitdepth == 15 && bit_unit == 16 ? RGBFB_R5G5B5PC
+				 : bitdepth == 16 && bit_unit == 16 ? RGBFB_R5G6B5PC
+				 : bit_unit == 24 ? RGBFB_B8G8R8
+				 : bit_unit == 32 ? RGBFB_B8G8R8A8
+				 : RGBFB_NONE);
+
+    *ppixel_format = 1 << picasso_vidinfo.rgbformat;
+    if (bit_unit == 16 || bit_unit == 32)
+    {
+	*ppixel_format |= RGBFF_CHUNKY;
+	emulate_chunky = 1;
+    }
+
+    for (i = 0; i < MAX_SCREEN_MODES && count < MAX_PICASSO_MODES; i++)
+    {
+	int j;
+	for (j = 0; j <= emulate_chunky && count < MAX_PICASSO_MODES; j++)
+	{
+	    if (x_size_table[i] <= w && y_size_table[i] <= h)
+	    {
+		if (x_size_table[i] > picasso_maxw)
+		    picasso_maxw = x_size_table[i];
+		if (y_size_table[i] > picasso_maxh)
+		    picasso_maxh = y_size_table[i];
+		DisplayModes[count].res.width = x_size_table[i];
+		DisplayModes[count].res.height = y_size_table[i];
+		DisplayModes[count].depth = j == 1 ? 1 : bit_unit >> 3;
+		DisplayModes[count].refresh = 75;
+#ifdef DEBUG
+		fprintf(stderr, "Picasso resolution %d x %d @ %d allowed\n", DisplayModes[count].res.width, DisplayModes[count].res.height, DisplayModes[count].depth);
+#endif
+
+		count++;
+	    }
+	}
+    }
+#ifdef DEBUG
+    fprintf(stderr, "Max. Picasso screen size: %d x %d\n", picasso_maxw, picasso_maxh);
+#endif
+    return count;
+}
+
+static void set_window_for_picasso (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: set_window_for_picasso\n");
+#endif
+
+    if (current_width == picasso_vidinfo.width && current_height == picasso_vidinfo.height)
+		return;
+
+    current_width = picasso_vidinfo.width;
+    current_height = picasso_vidinfo.height;
+    graphics_subshutdown ();
+    graphics_subinit ();
+//	XResizeWindow (display, mywin, current_width, current_height);
+}
+
+void gfx_set_picasso_modeinfo (int w, int h, int depth, int rgbfmt)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: gfx_set_picasso_modeinfo w: %i h: %i depth: %i rgbfmt: %i\n",w, h, depth, rgbfmt);
+#endif
+
+    picasso_vidinfo.width = w;
+    picasso_vidinfo.height = h;
+    picasso_vidinfo.depth = depth;
+    picasso_vidinfo.pixbytes = bit_unit >> 3;
+    if (screen_is_picasso)
+	set_window_for_picasso();
+}
+
+void gfx_set_picasso_baseaddr (uaecptr a)
+{
+}
+
+void gfx_set_picasso_state (int on)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: gfx_set_picasso_state\n");
+#endif
+
+    if (on == screen_is_picasso)
+	return;
+    graphics_subshutdown ();
+    screen_is_picasso = on;
+    if (on)
+    {
+	// Set height, width for Picasso gfx
+	current_width = picasso_vidinfo.width;
+	current_height = picasso_vidinfo.height;
+    }
+    else
+    {
+	// Set height, width for Amiga gfx
+	current_width = gfxvidinfo.width;
+	current_height = gfxvidinfo.height;
+    }
+    graphics_subinit ();
+    if (on)
+	DX_SetPalette (0, 256);
+}
+
+uae_u8 *gfx_lock_picasso (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: gfx_lock_picasso\n");
+#endif
+    SDL_LockSurface(prSDLScreen);
+    return prSDLScreen->pixels;
+}
+
+void gfx_unlock_picasso (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: gfx_unlock_picasso\n");
+#endif
+    SDL_UnlockSurface(prSDLScreen);
+}
+#endif
+
+int lockscr (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: lockscr\n");
+#endif
+    SDL_LockSurface(prSDLScreen);
+    return 1;
+}
+
+void unlockscr (void)
+{
+#ifdef DEBUG
+    fprintf(stderr, "Function: unlockscr\n");
+#endif
+    SDL_UnlockSurface(prSDLScreen);
+}
+
+static void handle_mousegrab (void)
+{
+    if (SDL_WM_GrabInput(SDL_GRAB_QUERY) == SDL_GRAB_OFF)
+    {
+	SDL_WM_GrabInput(SDL_GRAB_ON);
+	SDL_WarpMouse(0, 0);
+    }
+    else
+    {
+	SDL_WM_GrabInput(SDL_GRAB_OFF);
+    }
+}
+
+static void handle_inhibit (void)
+{
+    toggle_inhibit_frame (IHF_SCROLLLOCK);
+}
+
+
+static void handle_interpol (void)
+{
+    if (currprefs.sound_interpol == 0)
+    {
+	currprefs.sound_interpol = 1;
+	printf ("Interpol on: rh\n");
+    }
+    else if (currprefs.sound_interpol == 1)
+    {
+	currprefs.sound_interpol = 2;
+	printf ("Interpol on: crux\n");
+    }
+    else
+    {
+	currprefs.sound_interpol = 0;
+	printf ("Interpol off\n");
+    }
+}
+
+static void framerate_up (void)
+{
+    if (currprefs.gfx_framerate < 20)
+	changed_prefs.gfx_framerate = currprefs.gfx_framerate + 1;
+}
+
+static void framerate_down (void)
+{
+    if (currprefs.gfx_framerate > 1)
+	changed_prefs.gfx_framerate = currprefs.gfx_framerate - 1;
+}
+
+void target_save_options (FILE *f, struct uae_prefs *p)
+{
+}
+
+int target_parse_option (struct uae_prefs *p, char *option, char *value)
+{
+    return 0;
+}
diff -urN src-0.8.22/src/serial.c src-0.8.22-mmu/src/serial.c
--- src-0.8.22/src/serial.c	2001-12-17 19:38:38.000000000 +0100
+++ src-0.8.22-mmu/src/serial.c	2003-07-25 12:12:12.000000000 +0200
@@ -43,7 +43,7 @@
 #define O_NONBLOCK O_NDELAY
 #endif
 
-#define SERIALDEBUG 1 /* 0, 1, 2 3 */
+#define SERIALDEBUG 0 /* 0, 1, 2 3 */
 #define MODEMTEST   0 /* 0 or 1 */
 
 void serial_open (void);
@@ -80,18 +80,25 @@
 
 int sd = -1;
 
+static int ser_log = 0;
+
 #ifdef POSIX_SERIAL
     struct termios tios;
 #endif
 
-uae_u16 serper=0,serdat;
+uae_u16 serper=0, serdat = 0;
 
 void SERPER (uae_u16 w)
 {
     int baud=0, pspeed;
 
-    if (!currprefs.use_serial)
+    if (!currprefs.use_serial)	{
+        ser_log = (w == 0x170);	/* enable logging if we are using 9600 BAUD */
+#if SERIALDEBUG > 1
+	write_log("SERPER: %x\n", w);
+#endif
 	return;
+    }
 
 #if defined POSIX_SERIAL
     if (serper == w)  /* don't set baudrate if it's already ok */
@@ -178,12 +185,9 @@
 {
     unsigned char z;
 
-    if (!currprefs.use_serial)
-	return;
-
     z = (unsigned char)(w&0xff);
 
-    if (currprefs.serial_demand && !dtr) {
+    if (currprefs.serial_demand && !dtr && !ser_log) {
 	if (!isbaeh) {
 	    write_log("SERDAT: Baeh.. Your software needs SERIAL_ALWAYS to work properly.\n");
 	    isbaeh=1;
@@ -206,8 +210,6 @@
 
 uae_u16 SERDATR (void)
 {
-    if (!currprefs.use_serial)
-	return 0;
 #if SERIALDEBUG > 2
     write_log ("SERDATR: read 0x%04x\n", serdat);
 #endif
@@ -284,10 +286,14 @@
 
 void serial_flush_buffer(void)
 {
-    if (serdev == 1) {
-	if (outlast) {
+    if (serdev == 1 || ser_log) {
+        if (outlast) {
 	    if (sd != 0) {
-		write (sd, outbuf, outlast);
+	        write (sd, outbuf, outlast);
+	    }
+	    if (ser_log)	{
+	        outbuf[outlast] = 0;
+		write_log("SER: %s\n", outbuf);
 	    }
 	}
 	outlast = 0;
diff -urN src-0.8.22/src/serial.c~ src-0.8.22-mmu/src/serial.c~
--- src-0.8.22/src/serial.c~	1970-01-01 01:00:00.000000000 +0100
+++ src-0.8.22-mmu/src/serial.c~	2003-07-25 12:11:11.000000000 +0200
@@ -0,0 +1,419 @@
+ /*
+  * UAE - The Un*x Amiga Emulator
+  *
+  *  Serial Line Emulation
+  *
+  * (c) 1996, 1997 Stefan Reinauer <stepan@linux.de>
+  * (c) 1997 Christian Schmitt <schmitt@freiburg.linux.de>
+  *
+  */
+
+#include "sysconfig.h"
+#include "sysdeps.h"
+
+#include "config.h"
+#include "options.h"
+#include "uae.h"
+#include "memory.h"
+#include "custom.h"
+#include "newcpu.h"
+#include "cia.h"
+
+#undef POSIX_SERIAL
+/* Some more or less good way to determine whether we can safely compile in
+ * the serial stuff. I'm certain it breaks compilation on some systems. */
+#if defined HAVE_SYS_TERMIOS_H && defined HAVE_POSIX_OPT_H && defined HAVE_SYS_IOCTL_H && defined HAVE_TCGETATTR
+#define POSIX_SERIAL
+#endif
+
+#ifdef POSIX_SERIAL
+#include <termios.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#endif
+
+#if !defined B300 || !defined B1200 || !defined B2400 || !defined B4800 || !defined B9600
+#undef POSIX_SERIAL
+#endif
+#if !defined B19200 || !defined B57600 || !defined B115200 || !defined B230400
+#undef POSIX_SERIAL
+#endif
+
+#ifndef O_NONBLOCK
+#define O_NONBLOCK O_NDELAY
+#endif
+
+#define SERIALDEBUG 0 /* 0, 1, 2 3 */
+#define MODEMTEST   0 /* 0 or 1 */
+
+void serial_open (void);
+void serial_close (void);
+void serial_init (void);
+void serial_exit (void);
+
+void serial_dtr_on (void);
+void serial_dtr_off (void);
+
+void serial_flush_buffer (void);
+static int serial_read (char *buffer);
+
+int serial_readstatus (void);
+uae_u16 serial_writestatus (int, int);
+
+uae_u16 SERDATR (void);
+
+int  SERDATS (void);
+void  SERPER (uae_u16 w);
+void  SERDAT (uae_u16 w);
+
+static char inbuf[1024], outbuf[1024];
+static int inptr, inlast, outlast;
+
+int waitqueue=0,
+    carrier=0,
+    serdev=0,
+    dsr=0,
+    dtr=0,
+    isbaeh=0,
+    doreadser=0,
+    serstat=-1;
+
+int sd = -1;
+
+static int ser_log = 0;
+
+#ifdef POSIX_SERIAL
+    struct termios tios;
+#endif
+
+uae_u16 serper=0, serdat = 0;
+
+void SERPER (uae_u16 w)
+{
+    int baud=0, pspeed;
+
+	if (!currprefs.use_serial)	{
+		ser_log = (w == 0x170);	/* enable logging if we are using 9600 BAUD */
+#if SERIALDEBUG > 1
+		write_log("SERPER: %x\n", w);
+#endif
+	return;
+	}
+
+#if defined POSIX_SERIAL
+    if (serper == w)  /* don't set baudrate if it's already ok */
+	return;
+    serper=w;
+
+    if (w&0x8000)
+	write_log ("SERPER: 9bit transmission not implemented.\n");
+
+    switch (w & 0x7fff) {
+     /* These values should be calculated by the current
+      * color clock value (NTSC/PAL). But this solution is
+      * easy and it works.
+      */
+
+     case 0x2e9b:
+     case 0x2e14: baud=300; pspeed=B300; break;
+     case 0x170a:
+     case 0x0b85: baud=1200; pspeed=B1200; break;
+     case 0x05c2:
+     case 0x05b9: baud=2400; pspeed=B2400; break;
+     case 0x02e9:
+     case 0x02e1: baud=4800; pspeed=B4800; break;
+     case 0x0174:
+     case 0x0170: baud=9600; pspeed=B9600; break;
+     case 0x00b9:
+     case 0x00b8: baud=19200; pspeed=B19200; break;
+     case 0x005c:
+     case 0x005d: baud=38400; pspeed=B38400; break;
+     case 0x003d: baud=57600; pspeed=B57600; break;
+     case 0x001e: baud=115200; pspeed=B115200; break;
+     case 0x000f: baud=230400; pspeed=B230400; break;
+     default:
+	write_log ("SERPER: unsupported baudrate (0x%04x) %d\n",w&0x7fff,
+		 (unsigned int)(3579546.471/(double)((w&0x7fff)+1)));  return;
+    }
+
+    /* Only access hardware when we own it */
+    if (serdev == 1) {
+	if (tcgetattr (sd, &tios) < 0) {
+	    write_log ("SERPER: TCGETATTR failed\n");
+	    return;
+	}
+
+	if (cfsetispeed (&tios, pspeed) < 0) {    /* set serial input speed */
+	    write_log ("SERPER: CFSETISPEED (%d bps) failed\n", baud);
+	    return;
+	}
+	if (cfsetospeed (&tios, pspeed) < 0) {    /* set serial output speed */
+	    write_log ("SERPER: CFSETOSPEED (%d bps) failed\n", baud);
+	    return;
+	}
+
+	if (tcsetattr (sd, TCSADRAIN, &tios) < 0) {
+	    write_log ("SERPER: TCSETATTR failed\n");
+	    return;
+	}
+    }
+#endif
+
+#if SERIALDEBUG > 0
+    if (serdev == 1)
+	write_log ("SERPER: baudrate set to %d bit/sec\n", baud);
+#endif
+}
+
+/* Not (fully) implemented yet:
+ *
+ *  -  Something's wrong with the Interrupts.
+ *     (NComm works, TERM does not. TERM switches to a
+ *     blind mode after a connect and wait's for the end
+ *     of an asynchronous read before switching blind
+ *     mode off again. It never gets there on UAE :-< )
+ *
+ *  -  RTS/CTS handshake, this is not really neccessary,
+ *     because you can use RTS/CTS "outside" without
+ *     passing it through to the emulated Amiga
+ *
+ *  -  ADCON-Register ($9e write, $10 read) Bit 11 (UARTBRK)
+ *     (see "Amiga Intern", pg 246)
+ */
+
+void SERDAT (uae_u16 w)
+{
+    unsigned char z;
+
+    z = (unsigned char)(w&0xff);
+
+    if (currprefs.serial_demand && !dtr && !ser_log) {
+	if (!isbaeh) {
+	    write_log("SERDAT: Baeh.. Your software needs SERIAL_ALWAYS to work properly.\n");
+	    isbaeh=1;
+	}
+	return;
+    } else {
+	outbuf[outlast++] = z;
+	if (outlast == sizeof outbuf)
+	    serial_flush_buffer();
+    }
+
+#if SERIALDEBUG > 2
+    write_log ("SERDAT: wrote 0x%04x\n", w);
+#endif
+
+    serdat|=0x2000; /* Set TBE in the SERDATR ... */
+    intreq|=1;      /* ... and in INTREQ register */
+    return;
+}
+
+uae_u16 SERDATR (void)
+{
+#if SERIALDEBUG > 2
+    write_log ("SERDATR: read 0x%04x\n", serdat);
+#endif
+    waitqueue = 0;
+    return serdat;
+}
+
+int SERDATS (void)
+{
+    unsigned char z;
+
+    if (!serdev)           /* || (serdat&0x4000)) */
+	return 0;
+
+    if (waitqueue == 1) {
+	intreq |= 0x0800;
+	return 1;
+    }
+
+    if ((serial_read ((char *)&z)) == 1) {
+	waitqueue = 1;
+	serdat = 0x4100; /* RBF and STP set! */
+	serdat |= ((unsigned int)z) & 0xff;
+	intreq |= 0x0800; /* Set RBF flag (Receive Buffer full) */
+
+#if SERIALDEBUG > 1
+	write_log ("SERDATS: received 0x%02x --> serdat==0x%04x\n",
+		 (unsigned int)z, (unsigned int)serdat);
+#endif
+	return 1;
+    }
+    return 0;
+}
+
+void serial_dtr_on(void)
+{
+#if SERIALDEBUG > 0
+    write_log ("DTR on.\n");
+#endif
+    dtr=1;
+
+    if (currprefs.serial_demand)
+	serial_open ();
+}
+
+void serial_dtr_off(void)
+{
+#if SERIALDEBUG > 0
+    write_log ("DTR off.\n");
+#endif
+    dtr=0;
+    if (currprefs.serial_demand)
+	serial_close ();
+}
+
+static int serial_read (char *buffer)
+{
+    if (inptr < inlast) {
+	*buffer = inbuf[inptr++];
+	return 1;
+    }
+
+    if (serdev == 1) {
+	inlast = read (sd, inbuf, sizeof inbuf);
+	inptr = 0;
+	if (inptr < inlast) {
+	    *buffer = inbuf[inptr++];
+	    return 1;
+	}
+    }
+
+    return 0;
+}
+
+void serial_flush_buffer(void)
+{
+	if (serdev == 1 || ser_log) {
+	if (outlast) {
+	    if (sd != 0) {
+		write (sd, outbuf, outlast);
+	    }
+			if (ser_log)	{
+				outbuf[outlast] = 0;
+				write_log("SER: %s\n", outbuf);
+			}
+	}
+	outlast = 0;
+    } else {
+      outlast = 0;
+      inptr = 0;
+      inlast = 0;
+    }
+}
+
+int serial_readstatus(void)
+{
+    int status = 0;
+
+#ifdef POSIX_SERIAL
+    ioctl (sd, TIOCMGET, &status);
+
+    if (status & TIOCM_CAR) {
+	if (!carrier) {
+	    ciabpra |= 0x20; /* Push up Carrier Detect line */
+	    carrier = 1;
+#if SERIALDEBUG > 0
+	    write_log ("Carrier detect.\n");
+#endif
+	}
+    } else {
+	if (carrier) {
+	    ciabpra &= ~0x20;
+	    carrier = 0;
+#if SERIALDEBUG > 0
+	    write_log ("Carrier lost.\n");
+#endif
+	}
+    }
+
+    if (status & TIOCM_DSR) {
+	if (!dsr) {
+	    ciabpra |= 0x08; /* DSR ON */
+	    dsr = 1;
+	}
+    } else {
+	if (dsr) {
+	    ciabpra &= ~0x08;
+	    dsr = 0;
+	}
+    }
+#endif
+    return status;
+}
+
+uae_u16 serial_writestatus (int old, int nw)
+{
+    if ((old & 0x80) == 0x80 && (nw & 0x80) == 0x00)
+	serial_dtr_on();
+    if ((old & 0x80) == 0x00 && (nw & 0x80) == 0x80)
+	serial_dtr_off();
+
+    if ((old & 0x40) != (nw & 0x40))
+	write_log ("RTS %s.\n", ((nw & 0x40) == 0x40) ? "set" : "cleared");
+
+    if ((old & 0x10) != (nw & 0x10))
+	write_log ("CTS %s.\n", ((nw & 0x10) == 0x10) ? "set" : "cleared");
+
+    return nw; /* This value could also be changed here */
+}
+
+void serial_open(void)
+{
+    if (serdev == 1)
+	return;
+
+    if ((sd = open (currprefs.sername, O_RDWR|O_NONBLOCK|O_BINARY, 0)) < 0) {
+	write_log ("Error: Could not open Device %s\n", currprefs.sername);
+	return;
+    }
+
+    serdev = 1;
+
+#ifdef POSIX_SERIAL
+    if (tcgetattr (sd, &tios) < 0) {		/* Initialize Serial tty */
+	write_log ("Serial: TCGETATTR failed\n");
+	return;
+    }
+    cfmakeraw (&tios);
+
+#ifndef MODEMTEST
+    tios.c_cflag &= ~CRTSCTS; /* Disable RTS/CTS */
+#else
+    tios.c_cflag |= CRTSCTS; /* Enabled for testing modems */
+#endif
+
+    if (tcsetattr (sd, TCSADRAIN, &tios) < 0) {
+	write_log ("Serial: TCSETATTR failed\n");
+	return;
+    }
+#endif
+}
+
+void serial_close (void)
+{
+    if (sd >= 0)
+	close (sd);
+    serdev = 0;
+}
+
+void serial_init (void)
+{
+    if (!currprefs.use_serial)
+	return;
+
+    if (!currprefs.serial_demand)
+	serial_open ();
+
+    serdat = 0x2000;
+    return;
+}
+
+void serial_exit (void)
+{
+    serial_close ();	/* serial_close can always be called because it	*/
+    dtr = 0;		/* just closes *opened* filehandles which is ok	*/
+    return;		/* when exiting.				*/
+}
