From fad1effa05443bb96bb88863fdf0d8093339ea0d Mon Sep 17 00:00:00 2001 From: Janis Date: Tue, 18 Apr 2023 20:48:19 +0200 Subject: [PATCH] canonicalize UE4 name into legal rust token --- src/lib.rs | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 131 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fdd3c12..2c8e5d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,8 @@ pub mod types; pub mod sdk { use std::{ - collections::{hash_map::Entry, HashMap}, + borrow::Cow, + collections::{hash_map::Entry, BTreeSet, HashMap, HashSet}, sync::Mutex, }; @@ -155,7 +156,9 @@ pub mod sdk { UAnyType::UField(_) => {} UAnyType::UScriptStruct(_) => {} UAnyType::UProperty(_) => {} - UAnyType::UEnum(obj) => Self::process_enum(obj), + UAnyType::UEnum(obj) => { + let enm = Self::process_enum(obj)?; + } UAnyType::UStruct(_) => {} UAnyType::UFunction(_) => {} } @@ -164,15 +167,136 @@ pub mod sdk { Ok(()) } - fn process_enum(enm: UEnum) { - let names = enm + fn process_enum(enm: UEnum) -> anyhow::Result { + let values = enm .get_names() .iter() - .map(|name| name.get_name().unwrap_or("".to_string())) + .map(|name| name.get_name().unwrap_or("AnonymousVariant".to_string())) + .map(|name| canonicalize_name(&name).to_string()) .collect::>(); - log::info!("enum: {}", enm.as_uobject().get_full_name_or_default()); - log::info!("{names:#?}"); + let name = enm + .as_uobject() + .get_full_name() + .context("enum name could not be found")?; + let name = canonicalize_name(&name).to_string(); + + Ok(Enum { name, values }) } } + + fn keywords() -> HashSet<&'static str> { + let mut keywords = HashSet::new(); + + // rust keywords + keywords.extend([ + "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", + "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", + "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", + "unsafe", "use", "where", "while", "async", "await", "dyn", "abstract", "become", + "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual", + "yield", "try", + ]); + + // rust types + keywords.extend([ + "bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", + "i32", "i64", "i128", "usize", "isize", + ]); + + keywords + } + + fn token_chars() -> HashSet { + let mut chars = HashSet::new(); + + chars.extend([ + ' ', '?', '+', '-', ':', '/', '^', '(', ')', '[', ']', '<', '>', '&', '.', '#', '\'', + '"', '%', + ]); + + chars + } + + struct SplitResult<'a> { + start: Option<&'a str>, + middle: usize, + end: Option<&'a str>, + } + + impl<'a> SplitResult<'a> { + pub fn is_valid(&self) -> bool { + self.start.is_some() && self.middle == 0 && self.end.is_none() + } + + pub fn into_valid(self) -> Cow<'a, str> { + if self.is_valid() { + Cow::Borrowed(self.start.unwrap()) + } else { + let mut valid = self.start.map(|s| s.to_string()).unwrap_or_default(); + valid.extend(core::iter::repeat('_').take(self.middle)); + + match self.end { + Some(end) => { + valid.push_str(&split_at_illegal_char(end).into_valid()); + } + None => {} + } + + Cow::Owned(valid) + } + } + } + + fn empty_or_some(s: &str) -> Option<&str> { + if s.is_empty() { + None + } else { + Some(s) + } + } + + fn split_at_illegal_char(input: &str) -> SplitResult { + let illegal_chars = token_chars(); + if let Some(pos) = input.find(|c| illegal_chars.contains(&c)) { + let start = empty_or_some(&input[..pos]); + // skip the illegal char + let rest = &input[pos + 1..]; + + if let Some(pos2) = rest.find(|c| !illegal_chars.contains(&c)) { + SplitResult { + start, + middle: pos2 + 1, + end: empty_or_some(&rest[pos2..]), + } + } else { + SplitResult { + start, + middle: 1, + end: empty_or_some(rest), + } + } + } else { + SplitResult { + start: empty_or_some(input), + middle: 0, + end: None, + } + } + } + + fn canonicalize_name(name: &str) -> Cow { + let valid = split_at_illegal_char(name).into_valid(); + if keywords().contains(valid.as_ref()) { + Cow::Owned(format!("_{}", &valid)) + } else { + valid + } + } + + #[derive(Debug)] + struct Enum { + name: String, + values: Vec, + } }