canonicalize UE4 name into legal rust token

This commit is contained in:
Janis 2023-04-18 20:48:19 +02:00
parent f12f48c83f
commit fad1effa05

View file

@ -4,7 +4,8 @@ pub mod types;
pub mod sdk { pub mod sdk {
use std::{ use std::{
collections::{hash_map::Entry, HashMap}, borrow::Cow,
collections::{hash_map::Entry, BTreeSet, HashMap, HashSet},
sync::Mutex, sync::Mutex,
}; };
@ -155,7 +156,9 @@ pub mod sdk {
UAnyType::UField(_) => {} UAnyType::UField(_) => {}
UAnyType::UScriptStruct(_) => {} UAnyType::UScriptStruct(_) => {}
UAnyType::UProperty(_) => {} UAnyType::UProperty(_) => {}
UAnyType::UEnum(obj) => Self::process_enum(obj), UAnyType::UEnum(obj) => {
let enm = Self::process_enum(obj)?;
}
UAnyType::UStruct(_) => {} UAnyType::UStruct(_) => {}
UAnyType::UFunction(_) => {} UAnyType::UFunction(_) => {}
} }
@ -164,15 +167,136 @@ pub mod sdk {
Ok(()) Ok(())
} }
fn process_enum(enm: UEnum) { fn process_enum(enm: UEnum) -> anyhow::Result<Enum> {
let names = enm let values = enm
.get_names() .get_names()
.iter() .iter()
.map(|name| name.get_name().unwrap_or("<invalid-name>".to_string())) .map(|name| name.get_name().unwrap_or("AnonymousVariant".to_string()))
.map(|name| canonicalize_name(&name).to_string())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
log::info!("enum: {}", enm.as_uobject().get_full_name_or_default()); let name = enm
log::info!("{names:#?}"); .as_uobject()
.get_full_name()
.context("enum name could not be found")?;
let name = canonicalize_name(&name).to_string();
Ok(Enum { name, values })
} }
} }
fn keywords() -> HashSet<&'static str> {
let mut keywords = HashSet::new();
// rust keywords
keywords.extend([
"as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
"for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
"return", "self", "Self", "static", "struct", "super", "trait", "true", "type",
"unsafe", "use", "where", "while", "async", "await", "dyn", "abstract", "become",
"box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual",
"yield", "try",
]);
// rust types
keywords.extend([
"bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16",
"i32", "i64", "i128", "usize", "isize",
]);
keywords
}
fn token_chars() -> HashSet<char> {
let mut chars = HashSet::new();
chars.extend([
' ', '?', '+', '-', ':', '/', '^', '(', ')', '[', ']', '<', '>', '&', '.', '#', '\'',
'"', '%',
]);
chars
}
struct SplitResult<'a> {
start: Option<&'a str>,
middle: usize,
end: Option<&'a str>,
}
impl<'a> SplitResult<'a> {
pub fn is_valid(&self) -> bool {
self.start.is_some() && self.middle == 0 && self.end.is_none()
}
pub fn into_valid(self) -> Cow<'a, str> {
if self.is_valid() {
Cow::Borrowed(self.start.unwrap())
} else {
let mut valid = self.start.map(|s| s.to_string()).unwrap_or_default();
valid.extend(core::iter::repeat('_').take(self.middle));
match self.end {
Some(end) => {
valid.push_str(&split_at_illegal_char(end).into_valid());
}
None => {}
}
Cow::Owned(valid)
}
}
}
fn empty_or_some(s: &str) -> Option<&str> {
if s.is_empty() {
None
} else {
Some(s)
}
}
fn split_at_illegal_char(input: &str) -> SplitResult {
let illegal_chars = token_chars();
if let Some(pos) = input.find(|c| illegal_chars.contains(&c)) {
let start = empty_or_some(&input[..pos]);
// skip the illegal char
let rest = &input[pos + 1..];
if let Some(pos2) = rest.find(|c| !illegal_chars.contains(&c)) {
SplitResult {
start,
middle: pos2 + 1,
end: empty_or_some(&rest[pos2..]),
}
} else {
SplitResult {
start,
middle: 1,
end: empty_or_some(rest),
}
}
} else {
SplitResult {
start: empty_or_some(input),
middle: 0,
end: None,
}
}
}
fn canonicalize_name(name: &str) -> Cow<str> {
let valid = split_at_illegal_char(name).into_valid();
if keywords().contains(valid.as_ref()) {
Cow::Owned(format!("_{}", &valid))
} else {
valid
}
}
#[derive(Debug)]
struct Enum {
name: String,
values: Vec<String>,
}
} }