Skip to content

Commit

Permalink
add tree func for reading files in dirs (chatchat-space#471)
Browse files Browse the repository at this point in the history
  • Loading branch information
DingJunyao authored May 27, 2023
1 parent 4295f60 commit f7e120f
Showing 1 changed file with 24 additions and 2 deletions.
26 changes: 24 additions & 2 deletions chains/local_doc_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,29 @@
from langchain.docstore.document import Document


def tree(filepath, ignore_dir_names=None, ignore_file_names=None):
"""返回两个列表,第一个列表为 filepath 下全部文件的完整路径, 第二个为对应的文件名"""
if ignore_dir_names is None:
ignore_dir_names = []
if ignore_file_names is None:
ignore_file_names = []
ret_list = []
if isinstance(filepath, str):
if not os.path.exists(filepath):
print("路径不存在")
return None, None
elif os.path.isfile(filepath) and os.path.basename(filepath) not in ignore_file_names:
return [filepath], [os.path.basename(filepath)]
elif os.path.isdir(filepath) and os.path.basename(filepath) not in ignore_dir_names:
for file in os.listdir(filepath):
fullfilepath = os.path.join(filepath, file)
if os.path.isfile(fullfilepath) and os.path.basename(fullfilepath) not in ignore_file_names:
ret_list.append(fullfilepath)
if os.path.isdir(fullfilepath) and os.path.basename(fullfilepath) not in ignore_dir_names:
ret_list.extend(tree(fullfilepath, ignore_dir_names, ignore_file_names)[0])
return ret_list, [os.path.basename(p) for p in ret_list]


def load_file(filepath, sentence_size=SENTENCE_SIZE):
if filepath.lower().endswith(".md"):
loader = UnstructuredFileLoader(filepath, mode="elements")
Expand Down Expand Up @@ -189,8 +212,7 @@ def init_knowledge_vector_store(self,
return None
elif os.path.isdir(filepath):
docs = []
for file in tqdm(os.listdir(filepath), desc="加载文件"):
fullfilepath = os.path.join(filepath, file)
for fullfilepath, file in tqdm(zip(*tree(filepath, ignore_dir_names=['tmp_files'])), desc="加载文件"):
try:
docs += load_file(fullfilepath, sentence_size)
loaded_files.append(fullfilepath)
Expand Down

0 comments on commit f7e120f

Please sign in to comment.