Compare commits
No commits in common. "cebebd906ab95b8c49ca344e7d64110439e5565b" and "5fb2269f18443bcd298e946e2070aca1a0473e67" have entirely different histories.
cebebd906a
...
5fb2269f18
|
|
@ -16,5 +16,3 @@ google-generativeai
|
|||
sentence-transformers
|
||||
# Graph database for relationship mapping
|
||||
neo4j
|
||||
qdrant-client
|
||||
sentence-transformers
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
|
||||
import os
|
||||
import re
|
||||
from typing import List, Dict
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Configuration
|
||||
AGENT_CATALOG_PATH = os.path.join(os.path.dirname(__file__), '../docs/AGENT_CATALOG.md')
|
||||
QDRANT_HOST = os.getenv("QDRANT_HOST", "localhost")
|
||||
QDRANT_PORT = int(os.getenv("QDRANT_PORT", 6333))
|
||||
COLLECTION_NAME = "routing_index"
|
||||
EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Defines the vector size as 384
|
||||
VECTOR_SIZE = 384
|
||||
|
||||
def read_agent_catalog(file_path: str) -> List[Dict]:
|
||||
"""Reads the agent catalog and extracts agent information."""
|
||||
if not os.path.exists(file_path):
|
||||
logger.error(f"Agent catalog not found at {file_path}")
|
||||
return []
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
agents = []
|
||||
# Regex to find agent blocks
|
||||
# Looking for ### Agent Name ... - **Crews:** Crew Name
|
||||
agent_blocks = re.split(r'### ', content)[1:] # Split and skip header
|
||||
|
||||
for block in agent_blocks:
|
||||
lines = block.strip().split('\n')
|
||||
name = lines[0].strip()
|
||||
|
||||
description = ""
|
||||
crew = ""
|
||||
|
||||
for line in lines:
|
||||
if line.startswith("- **Papel:**"):
|
||||
role = line.split(":", 1)[1].strip()
|
||||
description += f"Papel: {role}. "
|
||||
elif line.startswith("- **Especialidade:**"):
|
||||
specialty = line.split(":", 1)[1].strip()
|
||||
description += f"Especialidade: {specialty}. "
|
||||
elif line.startswith("- **Crews:**"):
|
||||
crew = line.split(":", 1)[1].strip()
|
||||
|
||||
if name and description and crew:
|
||||
agents.append({
|
||||
"name": name,
|
||||
"description": description,
|
||||
"crew": crew
|
||||
})
|
||||
logger.info(f"Found agent: {name} (Crew: {crew})")
|
||||
|
||||
return agents
|
||||
|
||||
def get_embeddings(texts: List[str]) -> List[List[float]]:
|
||||
"""Generates embeddings for a list of texts."""
|
||||
logger.info(f"Generating embeddings using model {EMBEDDING_MODEL_NAME}...")
|
||||
model = SentenceTransformer(EMBEDDING_MODEL_NAME)
|
||||
embeddings = model.encode(texts)
|
||||
return embeddings.tolist()
|
||||
|
||||
def init_qdrant_collection(client: QdrantClient, collection_name: str, vector_size: int):
|
||||
"""Creates the Qdrant collection if it does not exist."""
|
||||
collections = client.get_collections().collections
|
||||
exists = any(c.name == collection_name for c in collections)
|
||||
|
||||
if not exists:
|
||||
logger.info(f"Creating collection '{collection_name}' with vector size {vector_size}...")
|
||||
client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=models.VectorParams(
|
||||
size=vector_size,
|
||||
distance=models.Distance.COSINE
|
||||
)
|
||||
)
|
||||
else:
|
||||
logger.info(f"Collection '{collection_name}' already exists.")
|
||||
|
||||
def populate_collection(client: QdrantClient, collection_name: str, agents: List[Dict]):
|
||||
"""Populates the collection with agent embeddings."""
|
||||
if not agents:
|
||||
logger.warning("No agents to index.")
|
||||
return
|
||||
|
||||
descriptions = [agent["description"] for agent in agents]
|
||||
embeddings = get_embeddings(descriptions)
|
||||
|
||||
points = []
|
||||
for i, agent in enumerate(agents):
|
||||
# We process crew string to maybe pick the first one if multiple?
|
||||
# For now, let's keep the raw string, or split if it's comma separated
|
||||
# The instruction says payload: {"target_crew": "NomeDaCrew"}
|
||||
# If an agent belongs to multiple crews, we might want to create multiple points or just string match
|
||||
# Let's keep it simple for now as per instructions.
|
||||
|
||||
target_crew = agent["crew"]
|
||||
|
||||
points.append(models.PointStruct(
|
||||
id=i + 1, # Simple integer ID
|
||||
vector=embeddings[i],
|
||||
payload={
|
||||
"agent_name": agent["name"],
|
||||
"target_crew": target_crew,
|
||||
"description": agent["description"]
|
||||
}
|
||||
))
|
||||
|
||||
logger.info(f"Upserting {len(points)} points into '{collection_name}'...")
|
||||
client.upsert(
|
||||
collection_name=collection_name,
|
||||
points=points
|
||||
)
|
||||
logger.info("Indexing complete.")
|
||||
|
||||
def main():
|
||||
logger.info("Starting Athena DB Initialization...")
|
||||
|
||||
# 1. Read Catalog
|
||||
agents = read_agent_catalog(AGENT_CATALOG_PATH)
|
||||
if not agents:
|
||||
logger.error("Failed to extract agents from catalog.")
|
||||
return
|
||||
|
||||
# 2. Connect to Qdrant
|
||||
try:
|
||||
client = QdrantClient(host=QDRANT_HOST, port=QDRANT_PORT)
|
||||
# Check connection likely by getting collections
|
||||
client.get_collections()
|
||||
logger.info(f"Connected to Qdrant at {QDRANT_HOST}:{QDRANT_PORT}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to connect to Qdrant: {e}")
|
||||
# For the sake of this task execution without running Qdrant, we might fail here.
|
||||
# But the task is to CREATE the script.
|
||||
# If the user has Qdrant running, it will work.
|
||||
# If not, the script is still valid.
|
||||
return
|
||||
|
||||
# 3. Init Collection
|
||||
init_qdrant_collection(client, COLLECTION_NAME, VECTOR_SIZE)
|
||||
|
||||
# 4. Populate
|
||||
populate_collection(client, COLLECTION_NAME, agents)
|
||||
|
||||
logger.info("Athena DB Initialization finished successfully.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue