Coverage for mindsdb / interfaces / knowledge_base / utils.py: 100%
6 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1"""Utilities for knowledge base operations."""
2import hashlib
5def generate_document_id(content: str, content_column: str = None, provided_id: str = None) -> str:
6 """
7 Generate a deterministic document ID from content.
8 If provided_id exists, returns it directly.
9 For generated IDs, uses a short hash of just the content.
11 Args:
12 content: The content string
13 content_column: Name of the content column (not used in ID generation, kept for backward compatibility)
14 provided_id: Optional user-provided ID
15 Returns:
16 Deterministic document ID (either provided_id or a 16-char hash of content)
17 """
18 if provided_id is not None:
19 return provided_id
21 # Generate a shorter 16-character hash based only on content
22 hash_obj = hashlib.md5(content.encode())
23 return hash_obj.hexdigest()[:16]