1. Branchless counter
    Trade off between branch-compare and logical AND instruction, commonly used in embedded systems for accessing arrays by index.
    Downfall is, that array size must be power of 2, so bit mask is valid and counter/index roll back correctly.

    const unsigned int COUNTER_MAX = 0x100;
    unsigned int counter = 0;
    
    void func(void)
    {
    	counter++;
    	
    	if (counter == COUNTER_MAX) // count from 0 to 0xFF
    	{
    		counter = 0;
    	}
    }
    const unsigned int COUNTER_MAX = 0x100;
    unsigned int counter = 0;
    
    void func(void)
    {
    	counter++;
    	counter &= (COUNTER_MAX - 1); // set to 0 if > 0xFF, ie. 0x100
    }
    #define ARRAY_SIZE = 0x100;
    int array[ARRAY_SIZE];
     
    void func(void)
    {
    	static int index = 0;
    	
    	printf("%d\n", array[index])
    	index ++;
    	index &= (ARRAY_SIZE - 1);
    }

    If used in synchronously executed code (loop, state machine) branch miss-prediction delay can be reduced.

  2. Be careful when working with 64 bit variables on 32 bit architectures (and not only)
    #include <Windows.h>
    #include <stdint.h>
    
    void clear_bits(uint64_t * source, uint32_t mask)
    {
        *source &= ~mask;
    }
    
    #define CLEAR_BITS(source,mask) ((source)&=~(mask))
    
    int main()
    {
        uint64_t source;
    
        source = 0xFFFFFFFFFFFFFFFF;
        clear_bits(&source, 0xff00); // result 0x00000000ffff00ff
    
        source = 0xFFFFFFFFFFFFFFFF;
        CLEAR_BITS(source, 0xff00);  // result 0xffffffffffff00ff
    
        source = 0xFFFFFFFFFFFFFFFF;
        CLEAR_BITS(source, 0xff00u); // result 0x00000000ffff00ff
    
        system("pause");
        return 0;
    }

    One of solutions is to use post-fix like ULL (unsigned long long) so const values are interpreted correctly.

  3. Be careful with unused pins in embedded devices.
    The input pins are generally in high-impedance state (Hi-Z). Very often default pin mode is set as input and in case of use LSI (Low speed interall clock) extra electromagnetic noise can be introduced, because of shoot-through current flows internally and undefined behavior might occur. Unused pins should be pulled down to GND or GPIO clock source should be disabled.