diff --git a/src/mir.rs b/src/mir.rs
index cd6d47e..86aaf00 100644
--- a/src/mir.rs
+++ b/src/mir.rs
@@ -1,5 +1,6 @@
 //! Machine-level Intermediate Representation
 
+use std::collections::btree_map::Entry;
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
 
 use itertools::Itertools;
@@ -108,6 +109,8 @@ pub enum Inst {
     ConstantDWord,
     /// imm64
     ConstantQWord,
+    /// src
+    LoadConstant(Type), // hint for loading constant into register
     /// ast-node
     ExternRef,
     /// size, align
@@ -156,6 +159,40 @@ pub enum Inst {
 }
 
 impl Inst {
+    fn value_type(&self) -> Option<Type> {
+        match self {
+            Inst::Label
+            | Inst::ConstantBytes
+            | Inst::ConstantByte
+            | Inst::ConstantWord
+            | Inst::ConstantDWord
+            | Inst::ConstantQWord
+            | Inst::ExternRef
+            | Inst::Alloca
+            | Inst::ReturnValue
+            | Inst::Store(_)
+            | Inst::Return => None,
+            Inst::GetElementPtr(ty)
+            | Inst::Load(ty)
+            | Inst::LoadConstant(ty)
+            | Inst::Parameter(ty)
+            | Inst::Add(ty)
+            | Inst::Sub(ty)
+            | Inst::Mul(ty)
+            | Inst::MulSigned(ty)
+            | Inst::Div(ty)
+            | Inst::DivSigned(ty)
+            | Inst::Rem(ty)
+            | Inst::RemSigned(ty)
+            | Inst::BitAnd(ty)
+            | Inst::BitOr(ty)
+            | Inst::BitXOr(ty)
+            | Inst::Negate(ty)
+            | Inst::ShiftLeft(ty)
+            | Inst::ShiftRightSigned(ty)
+            | Inst::ShiftRightUnsigned(ty) => Some(*ty),
+        }
+    }
     fn has_value(&self) -> bool {
         // basically, when an arithmetic instruction has two immediates, then just replace it with a mov into the dst reg
         match self {
@@ -172,6 +209,7 @@ impl Inst {
             | Inst::Return => false,
             Inst::GetElementPtr(_)
             | Inst::Load(_)
+            | Inst::LoadConstant(_)
             | Inst::Parameter(_)
             | Inst::Add(_)
             | Inst::Sub(_)
@@ -307,6 +345,9 @@ impl Mir {
     pub fn gen_u64(&mut self, value: u64) -> u32 {
         self.push(Inst::ConstantQWord, Data::imm64(value))
     }
+    pub fn gen_load_const(&mut self, ty: Type, src: u32) -> u32 {
+        self.push(Inst::LoadConstant(ty), Data::node(src))
+    }
 
     pub fn gen_label(&mut self, name: StringsIndex) -> u32 {
         self.push(Inst::Label, Data::index(name))
@@ -439,7 +480,7 @@ impl Mir {
         &self,
         w: &mut W,
         strings: &StringTable,
-        reg_alloc: &HashMap<u32, amd64::Register>,
+        reg_alloc: &BTreeMap<u32, amd64::Register>,
         node: u32,
     ) -> core::fmt::Result {
         let idx = node as usize;
@@ -461,6 +502,10 @@ impl Mir {
             Inst::ConstantWord => writeln!(w, "%{node} = imm16({:x?})", data.as_imm16()),
             Inst::ConstantDWord => writeln!(w, "%{node} = imm32({:x?})", data.as_imm32()),
             Inst::ConstantQWord => writeln!(w, "%{node} = imm64({:x?})", data.as_imm64()),
+            Inst::LoadConstant(ty) => {
+                let src = data.as_node();
+                writeln!(w, "%{node} = load constant {ty} %{src}")
+            }
             Inst::ExternRef => writeln!(w, "%{node} = extern %%{}", data.as_node()),
             Inst::Alloca => {
                 let (size, align) = data.as_binary();
@@ -568,19 +613,18 @@ impl Mir {
 }
 
 impl Mir {
-    pub fn build_liveness(&self) -> HashMap<u32, amd64::Register> {
+    pub fn build_liveness(&self) -> BTreeMap<u32, amd64::Register> {
         struct Interval {
             start: u32,
             end: u32,
         }
 
         let mut references = BTreeMap::<u32, Vec<u32>>::new();
-        let mut prefered_colors = BTreeMap::<u32, amd64::Register>::new();
+        let mut preferred_colors = BTreeMap::<u32, amd64::Register>::new();
         use amd64::Register::*;
-        let mut param_registers = [rsi, rdi, rdx, rcx, r8, r9]
-            .into_iter()
-            .rev()
-            .collect::<Vec<_>>();
+        let mut in_colors = [r9, r8, rcx, rdx, rdi, rsi].to_vec();
+        let mut in_colors_sse = [xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0].to_vec();
+        let mut inouts = Vec::<u32>::new();
 
         for i in 0..self.nodes.len() {
             let inst = self.nodes[i];
@@ -590,20 +634,26 @@ impl Mir {
             references.insert(node, Vec::new());
 
             match inst {
-                Inst::Parameter(_) => {
-                    if let Some(reg) = param_registers.pop() {
+                Inst::Parameter(ty) => {
+                    if let Some(reg) = if ty.is_floating() {
+                        in_colors_sse.pop()
+                    } else {
+                        in_colors.pop()
+                    } {
                         println!("prefering {reg} for param");
-                        prefered_colors.insert(node, reg);
-                    }
-                }
-                Inst::Negate(_) | Inst::Load(_) => {
-                    references.get_mut(&data.as_node()).unwrap().push(node);
+                        preferred_colors.insert(node, reg);
+                    };
+                    inouts.push(node);
                 }
                 // return is thru rax.
                 Inst::ReturnValue => {
                     let val = data.as_node();
+                    inouts.push(val);
                     references.get_mut(&val).unwrap().push(node);
-                    _ = prefered_colors.try_insert(val, amd64::Register::rax);
+                    _ = preferred_colors.try_insert(val, amd64::Register::rax);
+                }
+                Inst::Negate(_) | Inst::Load(_) => {
+                    references.get_mut(&data.as_node()).unwrap().push(node);
                 }
                 Inst::GetElementPtr(_) => {
                     let (src, _) = data.as_binary();
@@ -617,24 +667,38 @@ impl Mir {
                     references.get_mut(&rhs).unwrap().push(node);
 
                     if !ty.is_floating() {
-                        _ = prefered_colors.try_insert(lhs, amd64::Register::rax);
-                        _ = prefered_colors.try_insert(rhs, amd64::Register::rax);
+                        _ = preferred_colors.try_insert(lhs, amd64::Register::rax);
+                        _ = preferred_colors.try_insert(rhs, amd64::Register::rax);
                     }
                 }
-                // mul wants lhs to be rax, imul can do either.
+                // div wants lhs to be rax, idiv can do either.
                 // note that it also clobers rdx
-                Inst::Div(_) | Inst::DivSigned(_) | Inst::Rem(_) | Inst::RemSigned(_) => {
+                Inst::Div(ty) | Inst::DivSigned(ty) => {
                     let (lhs, rhs) = data.as_binary();
                     references.get_mut(&lhs).unwrap().push(node);
                     references.get_mut(&rhs).unwrap().push(node);
-                    _ = prefered_colors.try_insert(lhs, amd64::Register::rax);
+
+                    if !ty.is_floating() {
+                        _ = preferred_colors.try_insert(lhs, amd64::Register::rax);
+                    }
+                }
+                // div wants lhs to be rax, idiv can do either.
+                // note that it also clobers rax
+                Inst::Rem(ty) | Inst::RemSigned(ty) => {
+                    let (lhs, rhs) = data.as_binary();
+                    references.get_mut(&lhs).unwrap().push(node);
+                    references.get_mut(&rhs).unwrap().push(node);
+
+                    if !ty.is_floating() {
+                        _ = preferred_colors.try_insert(lhs, amd64::Register::rdx);
+                    }
                 }
                 // shr,shl,sar want the shift to be in cl.
                 Inst::ShiftLeft(_) | Inst::ShiftRightSigned(_) | Inst::ShiftRightUnsigned(_) => {
                     let (lhs, rhs) = data.as_binary();
                     references.get_mut(&lhs).unwrap().push(node);
                     references.get_mut(&rhs).unwrap().push(node);
-                    _ = prefered_colors.try_insert(rhs, amd64::Register::rcx);
+                    _ = preferred_colors.try_insert(rhs, amd64::Register::rcx);
                 }
                 // add,adc,sub,sbb,or,and,xor and mov don't care much about their source registers
                 Inst::Add(_)
@@ -652,7 +716,7 @@ impl Mir {
         }
 
         references.retain(|&node, refs| !refs.is_empty() && self.nodes[node as usize].has_value());
-        prefered_colors.retain(|&node, _| self.nodes[node as usize].has_value());
+        preferred_colors.retain(|&node, _| self.nodes[node as usize].has_value());
 
         let intervals = references
             .iter()
@@ -675,40 +739,153 @@ impl Mir {
         }
         let inference_graph = petgraph::graph::UnGraph::<(), ()>::from_edges(edges.into_iter());
 
-        let gprs = amd64::Register::gp_registers();
-        let sses = amd64::Register::sse_registers();
+        #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+        enum Color {
+            #[default]
+            Unassigned,
+            Tentative(amd64::Register),
+            Final(amd64::Register),
+        }
 
-        let mut assigned_colors = HashMap::<u32, amd64::Register>::new();
-        for &node in references.keys().rev() {
-            if matches!(self.nodes[node as usize], Inst::Alloca) {
-                continue;
+        impl Color {
+            fn color(self) -> Option<amd64::Register> {
+                match self {
+                    Color::Unassigned => None,
+                    Color::Tentative(color) | Color::Final(color) => Some(color),
+                }
+            }
+        }
+
+        struct Colorizer<'a> {
+            mir: &'a Mir,
+            graph: petgraph::graph::UnGraph<(), ()>,
+            colors: BTreeMap<u32, Color>,
+            preferred: BTreeMap<u32, amd64::Register>,
+        }
+
+        impl<'a> Colorizer<'a> {
+            fn node_colors(&self, node: petgraph::graph::NodeIndex) -> &[amd64::Register] {
+                let colors = if self.mir.nodes[node.index()]
+                    .value_type()
+                    .map(|t| t.is_floating())
+                    == Some(true)
+                {
+                    &amd64::Register::SSE[..]
+                } else {
+                    &amd64::Register::GPR[..]
+                };
+                colors
             }
 
-            let clique_colors = inference_graph
-                .neighbors(node.into())
-                .filter_map(|e| assigned_colors.get(&(e.index() as u32)).cloned())
-                .collect::<BTreeSet<_>>();
-            let clique_preferred_colors = inference_graph
-                .neighbors(node.into())
-                .filter_map(|e| prefered_colors.get(&(e.index() as u32)).cloned())
-                .collect::<BTreeSet<_>>();
+            fn prepass<I: IntoIterator<Item = u32>>(&mut self, inouts: I) {
+                for node in inouts.into_iter() {
+                    self.precolor_node(node.into());
+                }
 
-            let color = prefered_colors
-                .get(&node)
-                .into_iter()
-                .chain(gprs.iter())
-                .filter(|reg| !clique_colors.contains(reg))
-                .find_or_first(|&reg| !clique_preferred_colors.contains(reg))
-                .cloned()
-                .expect("ran out of registers");
+                let keys = self.preferred.keys().cloned().collect::<Vec<_>>();
+                for node in keys {
+                    self.precolor_node(node.into());
+                }
+            }
 
-            println!("%{node} wants {:?}\n\tclique: {clique_colors:?}\n\tclique prefs: {clique_preferred_colors:?}\n\t-> {color}", prefered_colors.get(&node));
+            fn precolor_node(&mut self, node: petgraph::graph::NodeIndex) {
+                // prepass: assign preferred colours for in/out values and specific
+                // instructions like mul/div which require one operand to be in rax
+                let node_u32 = node.index() as u32;
+                // only apply color here if we have a preference
+                if let Some(preferred_color) = self.preferred.remove(&node_u32) {
+                    let mut clique_colors = self
+                        .graph
+                        .neighbors(node)
+                        .filter_map(|n| self.colors.get(&(n.index() as u32)).cloned());
 
-            prefered_colors.remove(&node);
+                    if clique_colors
+                        .find(|color| color.color() == Some(preferred_color))
+                        .is_none()
+                    {
+                        self.colors
+                            .insert(node_u32, Color::Tentative(preferred_color));
+                    }
+                };
+                // .chain(self.node_colors(node).into_iter().cloned());
+            }
 
-            assigned_colors.insert(node, color);
+            fn color_node(&mut self, node: petgraph::graph::NodeIndex) {
+                // final pass:
+                // look at clique colors and prefer to steal colors from
+                // tentatively colored nodes. this results in preferential
+                // coloring depending on the order of the prepass.
+                let node_u32 = node.index() as u32;
+                let clique_colors = self
+                    .graph
+                    .neighbors(node)
+                    .filter_map(|n| self.colors.get(&(n.index() as u32)).cloned())
+                    .collect::<BTreeSet<_>>();
+
+                let colors = self
+                    .node_colors(node)
+                    .into_iter()
+                    .filter(|&&r| !clique_colors.contains(&Color::Final(r)))
+                    .cloned()
+                    .collect::<BTreeSet<_>>();
+
+                // eprintln!("coloring %{node_u32}:");
+                // eprintln!("\twants: {:?}", self.colors.get(&node_u32));
+                // eprintln!("\tclique: {clique_colors:?}");
+                // eprintln!("\tcandidates: {colors:?}");
+
+                match self.colors.entry(node_u32) {
+                    Entry::Vacant(v) => {
+                        // here we want to first check clique_colors with tentative coloring.
+                        let color = colors
+                            .into_iter()
+                            .find_or_first(|&c| !clique_colors.contains(&Color::Tentative(c)))
+                            .expect("ran out of registers :(");
+                        v.insert(Color::Final(color));
+                    }
+                    Entry::Occupied(mut e) => {
+                        // we prefer to steal
+                        variant!(e.get() => &Color::Tentative(reg));
+                        let color = colors
+                            .into_iter()
+                            .find_or_first(|&c| c == reg)
+                            .expect("ran out of registers :(");
+                        e.insert(Color::Final(color));
+                    }
+                }
+            }
+
+            fn finalise(self) -> BTreeMap<u32, amd64::Register> {
+                self.colors
+                    .into_iter()
+                    .filter_map(|(node, c)| match c {
+                        Color::Final(reg) => Some((node, reg)),
+                        _ => None,
+                    })
+                    .collect()
+            }
         }
 
+        let mut colorizer = Colorizer {
+            mir: self,
+            graph: inference_graph,
+            preferred: preferred_colors,
+            colors: BTreeMap::new(),
+        };
+
+        // prepass: assign preferred colours for in/out values and specific
+        // instructions like mul/div which require one operand to be in rax
+        colorizer.prepass(inouts);
+
+        for &node in references.keys().rev() {
+            if !self.nodes[node as usize].has_value() {
+                continue;
+            }
+            colorizer.color_node(node.into());
+        }
+
+        let colors = colorizer.finalise();
+
         // eprintln!(
         //     "Inference Graph:\n{:?}",
         //     petgraph::dot::Dot::with_attr_getters(
@@ -729,9 +906,10 @@ impl Mir {
         //     ),
         // );
 
-        assigned_colors
+        colors
     }
 }
+
 pub struct DisplayMir<'a, 'b> {
     mir: &'a Mir,
     strings: &'b StringTable,