|
5 | 5 | import pandas as pd |
6 | 6 | from scipy.spatial.distance import cosine |
7 | 7 |
|
| 8 | +from rdagent.core.knowledge_base import KnowledgeBase |
8 | 9 | from rdagent.log import rdagent_logger as logger |
9 | 10 | from rdagent.oai.llm_utils import APIBackend |
10 | 11 |
|
@@ -68,14 +69,11 @@ def contents_to_documents(contents: List[str], label: str = None) -> List[Docume |
68 | 69 | return docs |
69 | 70 |
|
70 | 71 |
|
71 | | -class VectorBase: |
| 72 | +class VectorBase(KnowledgeBase): |
72 | 73 | """ |
73 | 74 | This class is used for handling vector storage and query |
74 | 75 | """ |
75 | 76 |
|
76 | | - def __init__(self, vector_df_path: Union[str, Path] = None, **kwargs): |
77 | | - pass |
78 | | - |
79 | 77 | def add(self, document: Union[Document, List[Document]]): |
80 | 78 | """ |
81 | 79 | add new node to vector_df |
@@ -104,28 +102,15 @@ def search(self, content: str, topk_k: int = 5, similarity_threshold: float = 0) |
104 | 102 | """ |
105 | 103 | pass |
106 | 104 |
|
107 | | - def load(self, **kwargs): |
108 | | - """load vector_df""" |
109 | | - |
110 | | - def save(self, **kwargs): |
111 | | - """save vector_df""" |
112 | | - |
113 | 105 |
|
114 | 106 | class PDVectorBase(VectorBase): |
115 | 107 | """ |
116 | 108 | Implement of VectorBase using Pandas |
117 | 109 | """ |
118 | 110 |
|
119 | | - def __init__(self, vector_df_path: Union[str, Path] = None): |
120 | | - super().__init__(vector_df_path) |
121 | | - |
122 | | - if vector_df_path: |
123 | | - try: |
124 | | - self.vector_df = self.load(vector_df_path) |
125 | | - except FileNotFoundError: |
126 | | - self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"]) |
127 | | - else: |
128 | | - self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"]) |
| 111 | + def __init__(self, path: Union[str, Path] = None): |
| 112 | + self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"]) |
| 113 | + super().__init__(path) |
129 | 114 |
|
130 | 115 | def shape(self): |
131 | 116 | return self.vector_df.shape |
@@ -196,10 +181,3 @@ def search(self, content: str, topk_k: int = 5, similarity_threshold: float = 0) |
196 | 181 | for _, similar_docs in most_similar_docs.iterrows(): |
197 | 182 | docs.append(Document().from_dict(similar_docs.to_dict())) |
198 | 183 | return docs, searched_similarities.to_list() |
199 | | - |
200 | | - def load(self, vector_df_path, **kwargs): |
201 | | - vector_df = pd.read_pickle(vector_df_path) |
202 | | - return vector_df |
203 | | - |
204 | | - def save(self, vector_df_path, **kwargs): |
205 | | - self.vector_df.to_pickle(vector_df_path) |
0 commit comments