..

Windows Dynamic Loading and API Hashing with Rust

This post has been indefinitely postponed due to the removal of impl const and ~const from the standard library. Once the effects system has been rewritten, this post will be completed. You can follow updates on the implementation on Zulip. A very outdated (and questionable) implementation of this post can be found here.

Table of Contents

Basics of Dynamic Loading

Create a new binary crate with cargo new dynamic-loading, then a new module called bindings with this code:

use std::ffi::{c_char, c_void};

type HANDLE = *mut c_void;   // HANDLE -> PVOID -> void*
type HMODULE = HANDLE;       // HMODULE -> HINSTANCE -> HANDLE
type LPCSTR = *const c_char; // __terminated CONST CHAR*
type LPCWSTR = *const u16;   // CONST WCHAR *LPCWSTR
type FARPROC = *mut c_void;  // int (CALLBACK* FARPROC)()
type BOOL = i32;             // typedef int BOOL
type HWND = HANDLE;          // typedef HANDLE HWND;
type UINT = u32;             // typedef unsigned int UINT;

extern "system" {
    // HMODULE LoadLibraryW([in] LPCWSTR lpLibFileName);
    pub fn LoadLibraryW(lpLibFileName: LPCWSTR) -> HMODULE;

    // FARPROC GetProcAddress([in] HMODULE hModule, [in] LPCSTR lpProcName);
    pub fn GetProcAddress(hModule: HMODULE, lpProcName: LPCSTR) -> FARPROC;

    // BOOL FreeLibrary([in] HMODULE hLibModule);
    pub fn FreeLibrary(hLibModule: HMODULE) -> BOOL;
}

// all params are [in, optional] for brevity, except for uType which is not [optional]
// int MessageBoxW(HWND hWnd, LPCWSTR lpText, LPCWSTR lpCaption, UINT uType);
pub type MessageBoxW =
    unsafe extern "system" fn(hWnd: HWND, lpText: LPCWSTR, lpCaption: LPCWSTR, uType: UINT) -> i32;

Since I am opting to use the wide variants of the Windows API (it is 2024, now) we will need a UTF-16 implementation. Luckily the official windows crate offers one, add it to a new module called literals:

/// A literal UTF-8 string with a trailing null terminator.
#[macro_export]
macro_rules! s {
    ($s:literal) => {
        ::core::concat!($s, '\0').as_ptr()
    };
}

/// A literal UTF-16 wide string with a trailing null terminator.
#[macro_export]
macro_rules! w {
    ($s:literal) => {
        {
            const INPUT: &[u8] = $s.as_bytes();
            const OUTPUT_LEN: usize = $crate::utf16_len(INPUT) + 1;
            const OUTPUT: &[u16; OUTPUT_LEN] = {
                let mut buffer = [0; OUTPUT_LEN];
                let mut input_pos = 0;
                let mut output_pos = 0;
                while let Some((mut code_point, new_pos)) = $crate::decode_utf8_char(INPUT, input_pos) {
                    input_pos = new_pos;
                    if code_point <= 0xffff {
                        buffer[output_pos] = code_point as u16;
                        output_pos += 1;
                    } else {
                        code_point -= 0x10000;
                        buffer[output_pos] = 0xd800 + (code_point >> 10) as u16;
                        output_pos += 1;
                        buffer[output_pos] = 0xdc00 + (code_point & 0x3ff) as u16;
                        output_pos += 1;
                    }
                }
                &{ buffer }
            };
            OUTPUT.as_ptr()
        }
    };
}

pub use s;
pub use w;

#[doc(hidden)]
pub const fn decode_utf8_char(bytes: &[u8], mut pos: usize) -> Option<(u32, usize)> {
    if bytes.len() == pos {
        return None;
    }
    let ch = bytes[pos] as u32;
    pos += 1;
    if ch <= 0x7f {
        return Some((ch, pos));
    }
    if (ch & 0xe0) == 0xc0 {
        if bytes.len() - pos < 1 {
            return None;
        }
        let ch2 = bytes[pos] as u32;
        pos += 1;
        if (ch2 & 0xc0) != 0x80 {
            return None;
        }
        let result: u32 = ((ch & 0x1f) << 6) | (ch2 & 0x3f);
        if result <= 0x7f {
            return None;
        }
        return Some((result, pos));
    }
    if (ch & 0xf0) == 0xe0 {
        if bytes.len() - pos < 2 {
            return None;
        }
        let ch2 = bytes[pos] as u32;
        pos += 1;
        let ch3 = bytes[pos] as u32;
        pos += 1;
        if (ch2 & 0xc0) != 0x80 || (ch3 & 0xc0) != 0x80 {
            return None;
        }
        let result = ((ch & 0x0f) << 12) | ((ch2 & 0x3f) << 6) | (ch3 & 0x3f);
        if result <= 0x7ff || (0xd800 <= result && result <= 0xdfff) {
            return None;
        }
        return Some((result, pos));
    }
    if (ch & 0xf8) == 0xf0 {
        if bytes.len() - pos < 3 {
            return None;
        }
        let ch2 = bytes[pos] as u32;
        pos += 1;
        let ch3 = bytes[pos] as u32;
        pos += 1;
        let ch4 = bytes[pos] as u32;
        pos += 1;
        if (ch2 & 0xc0) != 0x80 || (ch3 & 0xc0) != 0x80 || (ch4 & 0xc0) != 0x80 {
            return None;
        }
        let result =
            ((ch & 0x07) << 18) | ((ch2 & 0x3f) << 12) | ((ch3 & 0x3f) << 6) | (ch4 & 0x3f);
        if result <= 0xffff || 0x10ffff < result {
            return None;
        }
        return Some((result, pos));
    }
    None
}

#[doc(hidden)]
pub const fn utf16_len(bytes: &[u8]) -> usize {
    let mut pos = 0;
    let mut len = 0;
    while let Some((code_point, new_pos)) = decode_utf8_char(bytes, pos) {
        pos = new_pos;
        len += if code_point <= 0xffff { 1 } else { 2 };
    }
    len
}

Now with our FFI bindings in place we can change our main function to dynamically resolve MessageBoxW from User32.dll:

mod bindings;
mod literals;

use bindings::*;
use literals::*;

fn main() {
    unsafe {
        let user32 = LoadLibraryW(w!("User32.dll"));
        assert!(!user32.is_null(), "User32 handle is null");

        let message_box = GetProcAddress(user32, s!("MessageBoxW") as _);
        assert!(!message_box.is_null(), "MessageBoxW pointer is null");

        let message_box: MessageBoxW = core::mem::transmute(message_box);

        message_box(core::ptr::null_mut(), w!("Hello, World!"), w!(""), 0);

        FreeLibrary(user32);
    }
}

Wrapping LoadLibrary

Create a new library module and add:

use core::ptr::NonNull;

type HModule = NonNull<c_void>;
pub struct Library(HModule);

impl Library {
    pub fn new(name: &str) -> Option<Self> {
        let name = std::ffi::CString::new(name).unwrap();
        unsafe { core::mem::transmute(LoadLibraryA(name.as_ptr())) }
    }

    pub fn as_non_null(&self) -> HModule {
        self.0
    }

    pub fn into_non_null(self) -> HModule {
        self.0
    }
}

impl Drop for Library {
    fn drop(&mut self) {
        unsafe {
            FreeLibrary(self.0.as_ptr());
        }
    }
}

Layout Optimizations of NonNull

ptr::NonNull<T> is defined as:

#[repr(transparent)]
#[rustc_layout_scalar_valid_range_start(1)]
#[rustc_nonnull_optimization_guaranteed]
pub struct NonNull<T: ?Sized> {
    pointer: *const T,
}

Descriminant Ellision

NonNull<T> is like a *mut T, but with the added invariant that it’s not null. This invariant in the context of layout optimizations is called a niche. Since niche values are disjoint from the values allowed by the validity invariant, the compiler is able to ellide the descriminant from the enums memory layout1. The #[rustc_layout_scalar_valid_range_start(1)] and #[rustc_nonnull_optimization_guaranteed] attributes hint at this optimization.

Transparent Representation

Since NonNull<T> is marked as #repr(transparent), the layout and ABI of the struct is guaranteed to be the same as T, making it possible to transmute between T and the NonNull<T> struct.

In summary, ptr::NonNull<T: Sized> is guaranteed to be optimized such that:

  • NonNull<T> has the same size, alignment, and function call ABI as T. 2
  • Option<NonNull<T>> has the same size, alignment, and function call ABI as NonNull<T>. 3
  • transmute::<_, Option<NonNull<T>>([0u8; size_of::<NonNull<T>>()]) is sound and produces an Option::<NonNull<T>>::None. 3

Wrapping GetProcAddress

In order to hide away the transmute from the user, we can create a new method get_proc on our Library struct:

pub fn get_proc<P>(&self, name: &str) -> Option<P> {
    let name = CString::new(name).unwrap();
    let res = unsafe { GetProcAddress(self.0.as_ptr(), name.as_ptr()) };
    unsafe { core::mem::transmute_copy(&res) }
}

However there a few issues with this implementation:

  • we are using transmute_copy on an unrestricted generic paramter, this is very unsafe
  • Function pointer types are more like ‘static references to memory, when Library gets dropped and FreeLibrary gets called. Our returned function pointer may dangle.

In order to address these issues, create a farproc module with a marker trait called FarProc and seal it:

mod private {
    pub trait SealedFarProc: Copy {}
}
pub trait FarProc: private::SealedFarProc {}

Unfortunately the FarProc trait needs to be seperately implemented functions for each function with different arities. To recursively implement our trait for n-ary functions we can write an Incremental TT Muncher macro with an internal rule. This is the same way that Rust solves Tuple’s trait implementations.

macro_rules! farproc_impls {
    () => { 
        farproc_impls(@impl); 
    };
    ($T:ident $( , $U:ident )*) => {
        farproc_impls!($($U),*);
        farproc_impls!(@impl $T $( , $U)*);
    };
    (@impl $( $T:ident ),*) => {
        impl<Output, $($T),*> FarProc for unsafe extern "system" fn($($T),*) -> Output {}

        impl<'lib, Output, $($T),*> ExportedFn<'lib, unsafe extern "system" fn($($T),*) -> Output> {
            #[allow(non_snake_case)]
            #[inline(always)]
            pub unsafe fn call(&self, $($T: $T),*) -> Output {
                (self.1)($($T),*)
            }
        }
    };
}

farproc_impls!(A, B, C, D, E, F, G, H, I, J, K, L);

It should be noted that transmuting creates an unbounded lifetime, so self must be bound to 'lib or the compiler will not complain if Library gets dropped then a function gets used.

impl Library {
    pub fn get_proc<T: FarProc>(&'lib self, name: &str) -> Option<ExportedFn<'lib, F>> {
        let name = CString::new(name).unwrap();
        let res = unsafe { GetProcAddress(self.0.as_ptr(), name.as_ptr()) };
        res.map(|proc| unsafe { core::mem::transmute_copy(&proc) })
    }
}

Now to address the issue of the function pointer dangling, create a new ExportedFn struct that will hold our function pointer, it should be noted that the fields of the struct are not public:

#[repr(transparent)]
pub struct ExportedFn<'lib, F: FarProc>(PhantomData<&'lib>, F);
impl<'lib, F: FarProc> ExportedFn<'lib, F> {
    /// Returns the inner function pointer, consuming self in the process.
    ///
    /// # Safety
    ///
    /// The function pointer may dangle if the Library it came from is dropped.
    pub unsafe fn into_raw(self) -> F {
        self.1
    }

    /// Returns the inner function pointer.
    ///
    /// # Safety
    ///
    /// The function pointer may dangle if the Library it came from is dropped.
    pub unsafe fn as_raw(&self) -> F {
        self.1
    }
}

We will now need some way to call the function, we can reuse the macro we made for the FarProc trait:

macro_rules! farproc_impls {
    // other MacroRules ommited for brevity
    (@impl $( $T:ident ),*) => {
        impl<Output, $($T),*> FarProc for unsafe extern "system" fn($($T),*) -> Output {}

        impl<'lib, Output, $($T),*> ExportedFn<'lib, unsafe extern "system" fn($($T),*) -> Output> {
            #[allow(non_snake_case)]
            #[inline(always)]
            pub unsafe fn call(&self, $($T: $T),*) -> Output {
                (self.1)($($T),*)
            }
        }
    };
}

Now in main:

use crate::{bindings::MessageBoxA, library::Library};

mod bindings;
mod library;
mod farproc;

fn main() {
    let library = Library::new("User32.dll").expect("Could not load User32.dll");
    let message_box = library.get_proc::<MessageBoxA>("MessageBoxA").expect("Could not get MessageBoxA");

    drop(library);

    unsafe { 
        message_box.call(0, "Hello, World!\0".as_ptr(), "Hello, World!\0".as_ptr(), 0);
    }
}

https://cocomelonc.github.io/malware/2023/04/16/malware-av-evasion-16.html