Path.rglob
Path.rglob(...)Description
Documentation for Path.rglob.
Real-World Examples
Practical code examples showing how Path.rglob is used in real projects.
try:
# Get parent path from parentId
parent_path = ""
if not request.parentId.startswith('root_'):
# Find the parent item to get its path
for entry in Path(base_directory).rglob('*'):
if entry.is_dir():
entry_id = generate_id('dir', os.path.relpath(str(entry), base_directory))
if entry_id == request.parentId:
parent_path = os.path.relpath(str(entry), base_directory)
break
# Create the full path for the new file
full_path = os.path.join(base_directory, parent_path, request.name)
if os.path.exists(full_path):
raise HTTPException(status_code=400, detail="File already exists")
# Create parent directories if they don't exist
os.makedirs(os.path.dirname(full_path), exist_ok=True)
# Create empty file
with open(full_path, 'w', encoding='utf-8') as f:
f.write('')
if result.returncode != 0:
logger.warning(f" ✗ Clone failed: {result.stderr[:100]}")
continue
# Find and extract PDF files
pdf_files = list(clone_path.rglob("*.pdf"))
if pdf_files:
logger.info(f" ✓ Found {len(pdf_files)} PDF files")
# Copy PDFs to our dataset
for pdf_file in pdf_files:
# Skip large files (>5MB - probably not resumes)
if pdf_file.stat().st_size > 5 * 1024 * 1024:
continue
# Create unique filename
new_name = f"{repo_name}_{pdf_file.name}"
dest = self.output_dir / new_name
# Copy file
shutil.copy2(pdf_file, dest)
self.pdfs_extracted += 1
logger.info(f" → Extracted: {new_name} ({pdf_file.stat().st_size / 1024:.1f} KB)")
else: