feat: add postgres storage and remote sync
This commit is contained in:
50
core/parsers.py
Normal file
50
core/parsers.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
|
||||
USERNAME_PATTERN = re.compile(r"@([A-Za-z0-9_]{3,})")
|
||||
TME_PATTERN = re.compile(r"(?:https?://)?t\.me/([A-Za-z0-9_]{3,})")
|
||||
ID_PATTERN = re.compile(r"`(\d{4,})`")
|
||||
|
||||
|
||||
def extract_entities(text: str) -> List[Dict[str, Any]]:
|
||||
if not text:
|
||||
return []
|
||||
|
||||
entities: List[Dict[str, Any]] = []
|
||||
seen = set()
|
||||
|
||||
for user_id in set(ID_PATTERN.findall(text)):
|
||||
key = ("user_id", user_id)
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
entities.append({
|
||||
"type": "user_id",
|
||||
"value": user_id
|
||||
})
|
||||
|
||||
for username in set(USERNAME_PATTERN.findall(text)):
|
||||
normalized = username.strip()
|
||||
if not normalized:
|
||||
continue
|
||||
key = ("username", normalized.lower())
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
entities.append({
|
||||
"type": "username",
|
||||
"value": normalized.lower(),
|
||||
"display": normalized
|
||||
})
|
||||
|
||||
for link in set(TME_PATTERN.findall(text)):
|
||||
normalized = f"t.me/{link}"
|
||||
key = ("tme_link", normalized.lower())
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
entities.append({
|
||||
"type": "tme_link",
|
||||
"value": normalized.lower(),
|
||||
"display": normalized
|
||||
})
|
||||
|
||||
return entities
|
||||
Reference in New Issue
Block a user