canonicalize UE4 name into legal rust token
This commit is contained in:
		
							parent
							
								
									f12f48c83f
								
							
						
					
					
						commit
						fad1effa05
					
				
							
								
								
									
										138
									
								
								src/lib.rs
									
									
									
									
									
								
							
							
						
						
									
										138
									
								
								src/lib.rs
									
									
									
									
									
								
							|  | @ -4,7 +4,8 @@ pub mod types; | |||
| 
 | ||||
| pub mod sdk { | ||||
|     use std::{ | ||||
|         collections::{hash_map::Entry, HashMap}, | ||||
|         borrow::Cow, | ||||
|         collections::{hash_map::Entry, BTreeSet, HashMap, HashSet}, | ||||
|         sync::Mutex, | ||||
|     }; | ||||
| 
 | ||||
|  | @ -155,7 +156,9 @@ pub mod sdk { | |||
|                     UAnyType::UField(_) => {} | ||||
|                     UAnyType::UScriptStruct(_) => {} | ||||
|                     UAnyType::UProperty(_) => {} | ||||
|                     UAnyType::UEnum(obj) => Self::process_enum(obj), | ||||
|                     UAnyType::UEnum(obj) => { | ||||
|                         let enm = Self::process_enum(obj)?; | ||||
|                     } | ||||
|                     UAnyType::UStruct(_) => {} | ||||
|                     UAnyType::UFunction(_) => {} | ||||
|                 } | ||||
|  | @ -164,15 +167,136 @@ pub mod sdk { | |||
|             Ok(()) | ||||
|         } | ||||
| 
 | ||||
|         fn process_enum(enm: UEnum) { | ||||
|             let names = enm | ||||
|         fn process_enum(enm: UEnum) -> anyhow::Result<Enum> { | ||||
|             let values = enm | ||||
|                 .get_names() | ||||
|                 .iter() | ||||
|                 .map(|name| name.get_name().unwrap_or("<invalid-name>".to_string())) | ||||
|                 .map(|name| name.get_name().unwrap_or("AnonymousVariant".to_string())) | ||||
|                 .map(|name| canonicalize_name(&name).to_string()) | ||||
|                 .collect::<Vec<_>>(); | ||||
| 
 | ||||
|             log::info!("enum: {}", enm.as_uobject().get_full_name_or_default()); | ||||
|             log::info!("{names:#?}"); | ||||
|             let name = enm | ||||
|                 .as_uobject() | ||||
|                 .get_full_name() | ||||
|                 .context("enum name could not be found")?; | ||||
|             let name = canonicalize_name(&name).to_string(); | ||||
| 
 | ||||
|             Ok(Enum { name, values }) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn keywords() -> HashSet<&'static str> { | ||||
|         let mut keywords = HashSet::new(); | ||||
| 
 | ||||
|         // rust keywords
 | ||||
|         keywords.extend([ | ||||
|             "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn", | ||||
|             "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref", | ||||
|             "return", "self", "Self", "static", "struct", "super", "trait", "true", "type", | ||||
|             "unsafe", "use", "where", "while", "async", "await", "dyn", "abstract", "become", | ||||
|             "box", "do", "final", "macro", "override", "priv", "typeof", "unsized", "virtual", | ||||
|             "yield", "try", | ||||
|         ]); | ||||
| 
 | ||||
|         // rust types
 | ||||
|         keywords.extend([ | ||||
|             "bool", "f64", "f32", "str", "char", "u8", "u16", "u32", "u64", "u128", "i8", "i16", | ||||
|             "i32", "i64", "i128", "usize", "isize", | ||||
|         ]); | ||||
| 
 | ||||
|         keywords | ||||
|     } | ||||
| 
 | ||||
|     fn token_chars() -> HashSet<char> { | ||||
|         let mut chars = HashSet::new(); | ||||
| 
 | ||||
|         chars.extend([ | ||||
|             ' ', '?', '+', '-', ':', '/', '^', '(', ')', '[', ']', '<', '>', '&', '.', '#', '\'', | ||||
|             '"', '%', | ||||
|         ]); | ||||
| 
 | ||||
|         chars | ||||
|     } | ||||
| 
 | ||||
|     struct SplitResult<'a> { | ||||
|         start: Option<&'a str>, | ||||
|         middle: usize, | ||||
|         end: Option<&'a str>, | ||||
|     } | ||||
| 
 | ||||
|     impl<'a> SplitResult<'a> { | ||||
|         pub fn is_valid(&self) -> bool { | ||||
|             self.start.is_some() && self.middle == 0 && self.end.is_none() | ||||
|         } | ||||
| 
 | ||||
|         pub fn into_valid(self) -> Cow<'a, str> { | ||||
|             if self.is_valid() { | ||||
|                 Cow::Borrowed(self.start.unwrap()) | ||||
|             } else { | ||||
|                 let mut valid = self.start.map(|s| s.to_string()).unwrap_or_default(); | ||||
|                 valid.extend(core::iter::repeat('_').take(self.middle)); | ||||
| 
 | ||||
|                 match self.end { | ||||
|                     Some(end) => { | ||||
|                         valid.push_str(&split_at_illegal_char(end).into_valid()); | ||||
|                     } | ||||
|                     None => {} | ||||
|                 } | ||||
| 
 | ||||
|                 Cow::Owned(valid) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn empty_or_some(s: &str) -> Option<&str> { | ||||
|         if s.is_empty() { | ||||
|             None | ||||
|         } else { | ||||
|             Some(s) | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn split_at_illegal_char(input: &str) -> SplitResult { | ||||
|         let illegal_chars = token_chars(); | ||||
|         if let Some(pos) = input.find(|c| illegal_chars.contains(&c)) { | ||||
|             let start = empty_or_some(&input[..pos]); | ||||
|             // skip the illegal char
 | ||||
|             let rest = &input[pos + 1..]; | ||||
| 
 | ||||
|             if let Some(pos2) = rest.find(|c| !illegal_chars.contains(&c)) { | ||||
|                 SplitResult { | ||||
|                     start, | ||||
|                     middle: pos2 + 1, | ||||
|                     end: empty_or_some(&rest[pos2..]), | ||||
|                 } | ||||
|             } else { | ||||
|                 SplitResult { | ||||
|                     start, | ||||
|                     middle: 1, | ||||
|                     end: empty_or_some(rest), | ||||
|                 } | ||||
|             } | ||||
|         } else { | ||||
|             SplitResult { | ||||
|                 start: empty_or_some(input), | ||||
|                 middle: 0, | ||||
|                 end: None, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     fn canonicalize_name(name: &str) -> Cow<str> { | ||||
|         let valid = split_at_illegal_char(name).into_valid(); | ||||
|         if keywords().contains(valid.as_ref()) { | ||||
|             Cow::Owned(format!("_{}", &valid)) | ||||
|         } else { | ||||
|             valid | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     #[derive(Debug)] | ||||
|     struct Enum { | ||||
|         name: String, | ||||
|         values: Vec<String>, | ||||
|     } | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue