# Copyright (c) Meta Platforms, Inc. and affiliates. # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. """ Functions for resolving text content from Doxygen XML types. """ from __future__ import annotations import re from enum import Enum from doxmlparser import compound # Doxygen's encoding for special characters in refids, ordered longest-first # to avoid partial matches during replacement. _DOXYGEN_TEMPLATE_ENCODINGS = ( ("_8_8_8", "..."), # Variadic ellipsis ("_00", ", "), # Comma (with space for readability) ("_01", " "), # Space ("_02", "*"), # Pointer ("_05", "="), # Equals ("_06", "&"), # Reference ("_07", "("), # Open paren ("_08", ")"), # Close paren ("_3", "<"), # Template open ("_4", ">"), # Template close ) def decode_doxygen_template_encoding(encoded: str) -> str: """Decode Doxygen's encoding for template specializations in refids. Doxygen encodes special characters in refids using underscore-prefixed codes. e.g. 'SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'SyncCallback< R(Args...)>' """ result = encoded for pattern, replacement in _DOXYGEN_TEMPLATE_ENCODINGS: result = result.replace(pattern, replacement) return result def extract_namespace_from_refid(refid: str) -> str: """Extract the namespace prefix from a doxygen refid. e.g. 'namespacefacebook_1_1yoga_1a...' -> 'facebook::yoga' 'structfacebook_1_1react_1_1detail_1_1is__dynamic' -> 'facebook::react::detail::is_dynamic' 'classfacebook_1_1react_1_1SyncCallback_3_01R_07Args_8_8_8_08_4' -> 'facebook::react::SyncCallback< R(Args...)>' Doxygen encoding: - '::' is encoded as '_1_1' - '_' in identifiers is encoded as '__' (double underscore) - Template specializations are encoded with hex-like codes (see decode_doxygen_template_encoding) """ for prefix in ("namespace", "struct", "class", "union"): if refid.startswith(prefix): compound_part = refid[len(prefix) :] idx = compound_part.find("_1a") if idx != -1: compound_part = compound_part[:idx] # First replace '::' encoding (_1_1 -> ::) result = compound_part.replace("_1_1", "::") # Then replace double underscore with single underscore # (Doxygen encodes '_' in identifiers as '__') result = result.replace("__", "_") # Decode template specialization encodings result = decode_doxygen_template_encoding(result) return result return "" def normalize_angle_brackets(text: str) -> str: """Doxygen adds spaces around < and > to avoid XML ambiguity. e.g. "NSArray< id< RCTBridgeMethod > > *" -> "NSArray>*" """ text = re.sub(r"<\s+", "<", text) text = re.sub(r"\s+>", ">", text) return text def normalize_nullability(text: str) -> str: """Normalize Objective-C nullability annotations to a consistent form. There are three forms of nullability annotations in Objective-C: - nonnull/nullable (context-sensitive keywords, typically used as prefix) - _Nonnull/_Nullable (type qualifiers, can appear after the type) - __nonnull/__nullable (legacy Apple macros, deprecated but still used) This function normalizes all forms to _Nonnull/_Nullable, which is the most flexible form that can be used in any position. """ # Normalize __nonnull -> _Nonnull (must come before nonnull) text = re.sub(r"\b__nonnull\b", "_Nonnull", text) # Normalize nonnull -> _Nonnull (negative lookbehind to avoid _Nonnull) text = re.sub(r"(? _Nullable (must come before nullable) text = re.sub(r"\b__nullable\b", "_Nullable", text) # Normalize nullable -> _Nullable (negative lookbehind to avoid _Nullable) text = re.sub(r"(? str: """Normalize spacing around pointer (*) and reference (&, &&) symbols. Doxygen outputs types with a space before * and &, e.g.: - "NSString *" -> "NSString*" - "int &" -> "int&" - "T &&" -> "T&&" But for function arguments like "NSString *name", we want "NSString* name" (space moved from before * to after *). This normalizes to have no space before the pointer/reference symbol, while preserving the space after if there's an identifier following. """ # For patterns like "Type *name" -> "Type* name" (move space from before to after) # Match: word/type character or > followed by space(s), then *, then word char text = re.sub(r"(\w|>)\s+\*(\w)", r"\1* \2", text) # For patterns like "Type *" at end or before comma/paren/angle/open-paren -> "Type*" text = re.sub(r"(\w|>)\s+\*(?=\s*[,)>(]|$)", r"\1*", text) # For patterns like "Type *" followed by another * (pointer to pointer) text = re.sub(r"(\w|>)\s+\*(?=\*)", r"\1*", text) # Same for && (rvalue reference) text = re.sub(r"(\w|>)\s+&&(\w)", r"\1&& \2", text) text = re.sub(r"(\w|>)\s+&&(?=\s*[,)>(]|$)", r"\1&&", text) # Same for & (lvalue reference) - but don't match && text = re.sub(r"(\w|>)\s+&(?!&)(\w)", r"\1& \2", text) text = re.sub(r"(\w|>)\s+&(?!&)(?=\s*[,)>(]|$)", r"\1&", text) return text class InitializerType(Enum): NONE = (0,) ASSIGNMENT = (1,) BRACE = 2 def resolve_linked_text_name( type_def: compound.linkedTextType, strip_initializers: bool = False, ) -> (str, InitializerType): """ Resolve the full text content of a linkedTextType, including all text fragments and ref elements. """ name = "" in_string = False if hasattr(type_def, "content_") and type_def.content_: for part in type_def.content_: if part.category == 1: # MixedContainer.CategoryText in_string = part.value.count('"') % 2 != in_string name += part.value elif part.category == 3: # MixedContainer.CategoryComplex (ref element) # For ref elements, get the text content and fully qualify using refid text = "" if hasattr(part.value, "get_valueOf_"): text = part.value.get_valueOf_() elif hasattr(part.value, "valueOf_"): text = part.value.valueOf_ else: text = str(part.value) # Don't resolve refs inside string literals - doxygen may # incorrectly treat symbols in strings as references refid = getattr(part.value, "refid", None) if refid and not in_string: text = _qualify_text_with_refid(text, refid) name += text elif type_def.ref: name = type_def.ref[0].get_valueOf_() else: name = type_def.get_valueOf_() initialier_type = InitializerType.NONE if strip_initializers: if name.startswith("="): # Detect assignment initializers: = value initialier_type = InitializerType.ASSIGNMENT name = name[1:] elif name.startswith("{") and name.endswith("}"): # Detect brace initializers: {value} initialier_type = InitializerType.BRACE name = name[1:-1].strip() return ( normalize_nullability( normalize_pointer_spacing(normalize_angle_brackets(name.strip())) ), initialier_type, ) def _qualify_text_with_refid(text: str, refid: str) -> str: """Qualify a text symbol using the namespace extracted from its doxygen refid. For ref elements, doxygen provides a refid that encodes the fully qualified path to the referenced symbol. This function extracts the namespace from that refid and prepends it to the text, avoiding redundant qualification. Args: text: The symbol text (e.g., "SyncCallback") refid: The doxygen refid (e.g., "classfacebook_1_1react_1_1SyncCallback...") Returns: The qualified text (e.g., "facebook::react::SyncCallback") """ ns = extract_namespace_from_refid(refid) # Skip re-qualification if text is already globally qualified # (starts with "::") - it's already an absolute path if not ns or text.startswith(ns) or text.startswith("::"): return text # The text may already start with a trailing portion of the namespace. # For example ns="facebook::react::HighResDuration" and # text="HighResDuration::zero". We need to find the longest suffix of ns # that is a prefix of text (on a "::" boundary) and only prepend the # missing part. ns_parts = ns.split("::") prepend = ns for i in range(1, len(ns_parts)): suffix = "::".join(ns_parts[i:]) # Also compare without template args - for template specializations # like "SyncCallback< R(Args...)>", text "SyncCallback" should match base_suffix = _strip_template_args(ns_parts[i]) if ( text.startswith(suffix + "::") or text == suffix or text.startswith(base_suffix + "::") or text == base_suffix ): prepend = "::".join(ns_parts[:i]) break return prepend + "::" + text def _strip_template_args(name: str) -> str: """Strip template arguments from a type name. e.g. 'SyncCallback< R(Args...)>' -> 'SyncCallback' """ angle_idx = name.find("<") return name[:angle_idx].rstrip() if angle_idx != -1 else name