import re from typing import Any, Dict, List USERNAME_PATTERN = re.compile(r"@([A-Za-z0-9_]{3,})") TME_PATTERN = re.compile(r"(?:https?://)?t\.me/([A-Za-z0-9_]{3,})") ID_PATTERN = re.compile(r"`(\d{4,})`") def extract_entities(text: str) -> List[Dict[str, Any]]: if not text: return [] entities: List[Dict[str, Any]] = [] seen = set() for user_id in set(ID_PATTERN.findall(text)): key = ("user_id", user_id) if key not in seen: seen.add(key) entities.append({ "type": "user_id", "value": user_id }) for username in set(USERNAME_PATTERN.findall(text)): normalized = username.strip() if not normalized: continue key = ("username", normalized.lower()) if key not in seen: seen.add(key) entities.append({ "type": "username", "value": normalized.lower(), "display": normalized }) for link in set(TME_PATTERN.findall(text)): normalized = f"t.me/{link}" key = ("tme_link", normalized.lower()) if key not in seen: seen.add(key) entities.append({ "type": "tme_link", "value": normalized.lower(), "display": normalized }) return entities