Browse Source

Add likely macro, force-no-inline on some sim stuff

Makes compilation a lot faster, and also makes runtime a lot faster on
my computer. Probably due to fragile behavior of inlining on the big
switch statements, but still worth doing. -Os without -flto on clang is
a slightly smaller binary that's just as fast, but I don't know how
fragile that is to change and just happens to be good today. Need to get
a workflow for going through and looking at the output asm on Linux.
master
cancel 7 years ago
parent
commit
74e5f814dd
  1. 6
      base.h
  2. 38
      sim.c

6
base.h

@ -34,6 +34,12 @@
#define ORCA_ASSUME_ALIGNED(_ptr, _alignment) (_ptr) #define ORCA_ASSUME_ALIGNED(_ptr, _alignment) (_ptr)
#endif #endif
#if defined(__GNUC__) || defined(__clang__)
#define ORCA_LIKELY(_x) __builtin_expect(_x, 1)
#else
#define ORCA_LIKELY(_x) (_x)
#endif
#define ORCA_Y_MAX UINT16_MAX #define ORCA_Y_MAX UINT16_MAX
#define ORCA_X_MAX UINT16_MAX #define ORCA_X_MAX UINT16_MAX

38
sim.c

@ -2,16 +2,6 @@
#include "mark.h" #include "mark.h"
#include "sim.h" #include "sim.h"
#if 0
ORCA_FORCE_STATIC_INLINE void stupid_memcpy(char* restrict dest,
char* restrict src, size_t sz) {
for (size_t i = 0; i < sz; ++i) {
dest[i] = src[i];
}
}
#define ORCA_MEMCPY(_dest, _src, _sz) memcpy(_dest, _src, _sz)
#endif
//////// Utilities //////// Utilities
static Glyph const indexed_glyphs[] = { static Glyph const indexed_glyphs[] = {
@ -55,18 +45,18 @@ static inline Glyph glyphs_mod(Glyph a, Glyph b) {
// todo check if these inlines are actually being inlinded -- might be bad, // todo check if these inlines are actually being inlinded -- might be bad,
// should probably mark them not inlined // should probably mark them not inlined
static inline bool oper_has_neighboring_bang(Gbuffer gbuf, Usz h, Usz w, Usz y, static bool oper_has_neighboring_bang(Gbuffer gbuf, Usz h, Usz w, Usz y,
Usz x) { Usz x) {
return gbuffer_peek_relative(gbuf, h, w, y, x, 0, 1) == '*' || return gbuffer_peek_relative(gbuf, h, w, y, x, 0, 1) == '*' ||
gbuffer_peek_relative(gbuf, h, w, y, x, 0, -1) == '*' || gbuffer_peek_relative(gbuf, h, w, y, x, 0, -1) == '*' ||
gbuffer_peek_relative(gbuf, h, w, y, x, 1, 0) == '*' || gbuffer_peek_relative(gbuf, h, w, y, x, 1, 0) == '*' ||
gbuffer_peek_relative(gbuf, h, w, y, x, -1, 0) == '*'; gbuffer_peek_relative(gbuf, h, w, y, x, -1, 0) == '*';
} }
static inline void oper_move_relative_or_explode(Gbuffer gbuf, Mbuffer mbuf, static ORCA_FORCE_NO_INLINE void
Usz height, Usz width, oper_move_relative_or_explode(Gbuffer gbuf, Mbuffer mbuf, Usz height, Usz width,
Glyph moved, Usz y, Usz x, Glyph moved, Usz y, Usz x, Isz delta_y,
Isz delta_y, Isz delta_x) { Isz delta_x) {
Isz y0 = (Isz)y + delta_y; Isz y0 = (Isz)y + delta_y;
Isz x0 = (Isz)x + delta_x; Isz x0 = (Isz)x + delta_x;
if (y0 >= (Isz)height || x0 >= (Isz)width || y0 < 0 || x0 < 0) { if (y0 >= (Isz)height || x0 >= (Isz)width || y0 < 0 || x0 < 0) {
@ -96,16 +86,18 @@ typedef struct {
} Oper_bank_read_params; } Oper_bank_read_params;
// static may cause warning if programmer doesn't use bank storage // static may cause warning if programmer doesn't use bank storage
void oper_bank_store(Oper_bank_write_params* bank_params, Usz width, Usz y, void ORCA_FORCE_NO_INLINE oper_bank_store(Oper_bank_write_params* bank_params,
Usz x, I32* restrict vals, Usz num_vals) { Usz width, Usz y, Usz x,
I32* restrict vals, Usz num_vals) {
assert(num_vals > 0); assert(num_vals > 0);
Usz index = y * width + x; Usz index = y * width + x;
assert(index < ORCA_BANK_INDEX_MAX); assert(index < ORCA_BANK_INDEX_MAX);
bank_params->size = bank_params->size =
bank_append(bank_params->bank, bank_params->size, index, vals, num_vals); bank_append(bank_params->bank, bank_params->size, index, vals, num_vals);
} }
Usz oper_bank_load(Oper_bank_read_params* bank_params, Usz width, Usz y, Usz x, Usz ORCA_FORCE_NO_INLINE oper_bank_load(Oper_bank_read_params* bank_params,
I32* restrict out_vals, Usz out_count) { Usz width, Usz y, Usz x,
I32* restrict out_vals, Usz out_count) {
Usz index = y * width + x; Usz index = y * width + x;
assert(index < ORCA_BANK_INDEX_MAX); assert(index < ORCA_BANK_INDEX_MAX);
return bank_read(bank_params->bank->data, bank_params->size, return bank_read(bank_params->bank->data, bank_params->size,
@ -166,7 +158,7 @@ Usz usz_clamp(Usz val, Usz min, Usz max) {
(void)Tick_number; \ (void)Tick_number; \
(void)bank_params; (void)bank_params;
#define OPER_PHASE_SPEC static inline #define OPER_PHASE_SPEC static ORCA_FORCE_NO_INLINE
#define BEGIN_SOLO_PHASE_0(_oper_name) \ #define BEGIN_SOLO_PHASE_0(_oper_name) \
OPER_PHASE_SPEC void oper_phase0_##_oper_name(OPER_PHASE_0_COMMON_ARGS) { \ OPER_PHASE_SPEC void oper_phase0_##_oper_name(OPER_PHASE_0_COMMON_ARGS) { \
@ -829,7 +821,7 @@ static void sim_phase_0(Gbuffer gbuf, Mbuffer mbuf, Usz height, Usz width,
Glyph* glyph_row = gbuf + iy * width; Glyph* glyph_row = gbuf + iy * width;
for (Usz ix = 0; ix < width; ++ix) { for (Usz ix = 0; ix < width; ++ix) {
Glyph glyph_char = glyph_row[ix]; Glyph glyph_char = glyph_row[ix];
if (glyph_char == '.') if (ORCA_LIKELY(glyph_char == '.'))
continue; continue;
U8 cell_flags = mbuffer_peek(mbuf, height, width, iy, ix) & U8 cell_flags = mbuffer_peek(mbuf, height, width, iy, ix) &
(Mark_flag_lock | Mark_flag_sleep); (Mark_flag_lock | Mark_flag_sleep);
@ -847,7 +839,7 @@ static void sim_phase_1(Gbuffer gbuf, Mbuffer mbuf, Usz height, Usz width,
Glyph* glyph_row = gbuf + iy * width; Glyph* glyph_row = gbuf + iy * width;
for (Usz ix = 0; ix < width; ++ix) { for (Usz ix = 0; ix < width; ++ix) {
Glyph glyph_char = glyph_row[ix]; Glyph glyph_char = glyph_row[ix];
if (glyph_char == '.') if (ORCA_LIKELY(glyph_char == '.'))
continue; continue;
if (mbuffer_peek(mbuf, height, width, iy, ix) & if (mbuffer_peek(mbuf, height, width, iy, ix) &
(Mark_flag_lock | Mark_flag_sleep)) (Mark_flag_lock | Mark_flag_sleep))

Loading…
Cancel
Save