Coverage for mindsdb / interfaces / knowledge_base / utils.py: 100%

6 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 00:36 +0000

1"""Utilities for knowledge base operations.""" 

2import hashlib 

3 

4 

5def generate_document_id(content: str, content_column: str = None, provided_id: str = None) -> str: 

6 """ 

7 Generate a deterministic document ID from content. 

8 If provided_id exists, returns it directly. 

9 For generated IDs, uses a short hash of just the content. 

10 

11 Args: 

12 content: The content string 

13 content_column: Name of the content column (not used in ID generation, kept for backward compatibility) 

14 provided_id: Optional user-provided ID 

15 Returns: 

16 Deterministic document ID (either provided_id or a 16-char hash of content) 

17 """ 

18 if provided_id is not None: 

19 return provided_id 

20 

21 # Generate a shorter 16-character hash based only on content 

22 hash_obj = hashlib.md5(content.encode()) 

23 return hash_obj.hexdigest()[:16]