Author : Olivier Langlois To use the assembler scan routines for VC++, modify the following files In scan.cpp: #if defined(PROC_INTEL) && !defined(NO_ASSEMBLER) # if defined(COMP_GCC) # include "system/i386/scanln8.h" # else if defined(COMP_VC) # include "system/i386/scanln8vc.h" #endif #endif In scan16.cpp: #if defined(PROC_INTEL) && !defined(NO_ASSEMBLER) #if defined(COMP_GCC) # include "system/i386/scanln16.h" #else if defined(COMP_VC) # include "system/i386/scanln16vc.h" #endif #endif In system.h: #if defined (PROC_INTEL) && !defined (NO_ASSEMBLER) #if defined (COMP_GCC) || defined(COMP_VC) # define DO_MMX 1 #endif #endif I am sure that there is still a lot of room for improvements but I have observed approximately a 10% gain in speed which is not negligable. Here is some numbers from the VC++ profiler : DEPTH = 8, RENDER = Software, NO_ASSEMBLER: Func Func+Child Hit Time % Time % Count Function --------------------------------------------------------- 22706.566 51.3 22706.566 51.3 671599 Scan::draw_scanline_map(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) 3033.861 6.9 3033.873 6.9 389 SysGraphics2D::Print(class csRect *) (win32.obj) 1701.028 3.8 1701.028 3.8 410 TextureCache::create_lighted_texture_true_rgb(class PolyTexture *,class Textures *) (tcache.obj) 1672.422 3.8 1672.422 3.8 1181479 blueloop(int) (inv_cmap.obj) 1567.469 3.5 1567.469 3.5 208212 Scan::draw_scanline_z_buf_map(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) 1493.178 3.4 28622.648 64.7 14181 csGraphics3D::DrawPolygon(class G3DPolygon &) (graph3d.obj) 1013.807 2.3 1196.895 2.7 1 _DirectDrawEnumerateA@8 (ddraw.dll) 956.838 2.2 956.838 2.2 8476 csSystemDriver::Time(void) (timing.obj) 791.981 1.8 791.981 1.8 36 csSystemDriver::printf(char *,...) (printf.obj) 742.081 1.7 742.081 1.7 5 _DirectDrawCreate@12 (ddraw.dll) 685.792 1.6 685.792 1.6 15795 Scan::draw_scanline_map_alpha1(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) DEPTH = 8, RENDER = Software, WITH_ASSEMBLER: Func Func+Child Hit Time % Time % Count Function --------------------------------------------------------- 14244.959 38.4 14244.959 38.4 720479 Scan::mmx_draw_scanline_map(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) 3182.042 8.6 3182.066 8.6 401 SysGraphics2D::Print(class csRect *) (win32.obj) 1616.073 4.4 19645.886 53.0 14896 csGraphics3D::DrawPolygon(class G3DPolygon &) (graph3d.obj) 1607.906 4.3 1607.906 4.3 423 TextureCache::create_lighted_texture_true_rgb(class PolyTexture *,class Textures *) (tcache.obj) 1598.746 4.3 1598.746 4.3 5 _DirectDrawCreate@12 (ddraw.dll) 1189.862 3.2 2350.261 6.3 1 _DirectDrawEnumerateA@8 (ddraw.dll) 1170.533 3.2 1170.533 3.2 205046 Scan::draw_scanline_z_buf_map(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) 1122.587 3.0 1122.587 3.0 1181479 blueloop(int) (inv_cmap.obj) 993.361 2.7 993.361 2.7 37 csSystemDriver::printf(char *,...) (printf.obj) 987.924 2.7 987.924 2.7 8737 csSystemDriver::Time(void) (timing.obj) 595.051 1.6 3280.694 8.8 1 DirectDetection::checkDevices(void) (dirdetec.obj) 576.940 1.6 576.940 1.6 16818 Scan::draw_scanline_map_alpha1(int,unsigned char *,unsigned long *,float,float,float) (scan.obj) DEPTH = 16, RENDER = Software, NO_ASSEMBLER: Func Func+Child Hit Time % Time % Count Function --------------------------------------------------------- 28416.141 47.1 28416.141 47.1 703780 Scan16::draw_scanline_map(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 10081.547 16.7 10081.547 16.7 387 SysGraphics2D::Print(class csRect *) (win32.obj) 2897.373 4.8 2897.373 4.8 802 TextureCache16::create_lighted_texture_true_rgb(class PolyTexture *,class Textures *) (tcache16.obj) 2504.236 4.1 2504.236 4.1 5 _DirectDrawCreate@12 (ddraw.dll) 1605.533 2.7 1605.533 2.7 197223 Scan16::draw_scanline_z_buf_map(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 1453.314 2.4 35858.172 59.4 14268 csGraphics3D::DrawPolygon(class G3DPolygon &) (graph3d.obj) 1032.314 1.7 2478.984 4.1 1 _DirectDrawEnumerateA@8 (ddraw.dll) 1027.182 1.7 1027.182 1.7 16113 Scan16::draw_scanline_map_alpha25(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 1027.182 1.7 1027.182 1.7 16113 Scan16::draw_scanline_map_alpha50(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 1027.182 1.7 1027.182 1.7 16113 Scan16::draw_scanline_map_alpha75(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 948.218 1.6 948.218 1.6 8173 csSystemDriver::Time(void) (timing.obj) DEPTH = 16, RENDER = Software, WITH_ASSEMBLER: Func Func+Child Hit Time % Time % Count Function --------------------------------------------------------- 16768.674 36.3 16768.674 36.3 682158 Scan16::mmx_draw_scanline_map(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 10400.137 22.5 10400.137 22.5 399 SysGraphics2D::Print(class csRect *) (win32.obj) 2806.457 6.1 2806.457 6.1 756 TextureCache16::create_lighted_texture_true_rgb(class PolyTexture *,class Textures *) (tcache16.obj) 1898.455 4.1 1898.455 4.1 5 _DirectDrawCreate@12 (ddraw.dll) 1579.188 3.4 23978.102 51.9 14609 csGraphics3D::DrawPolygon(class G3DPolygon &) (graph3d.obj) 1194.294 2.6 1194.294 2.6 203888 Scan16::draw_scanline_z_buf_map(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 1160.760 2.5 1160.760 2.5 17016 Scan16::draw_scanline_map_alpha50(int,unsigned char *,unsigned long *,float,float,float) (scan16.obj) 992.088 2.1 1839.727 4.0 1 _DirectDrawEnumerateA@8 (ddraw.dll) 956.137 2.1 956.137 2.1 8546 csSystemDriver::Time(void) (timing.obj) 535.641 1.2 535.641 1.2 326537 blueloop(int) (inv_cmap.obj) 475.616 1.0 475.616 1.0 1 csSystemDriver::printf_init(void) (printf.obj)