list_documents

This method returns a ListDocsResponse object. Access documents via response.documents.

Sync
Async

def list_documents(
    skip: int = 0,
    limit: int = 100,
    filters: Optional[Dict[str, Any]] = None,
    folder_name: Optional[Union[str, List[str]]] = None,
    folder_depth: Optional[int] = None,
    include_total_count: bool = False,
    include_status_counts: bool = False,
    include_folder_counts: bool = False,
    completed_only: bool = False,
    sort_by: Optional[str] = "updated_at",
    sort_direction: str = "desc"
) -> ListDocsResponse

async def list_documents(
    skip: int = 0,
    limit: int = 100,
    filters: Optional[Dict[str, Any]] = None,
    folder_name: Optional[Union[str, List[str]]] = None,
    folder_depth: Optional[int] = None,
    include_total_count: bool = False,
    include_status_counts: bool = False,
    include_folder_counts: bool = False,
    completed_only: bool = False,
    sort_by: Optional[str] = "updated_at",
    sort_direction: str = "desc"
) -> ListDocsResponse

Parameters

skip (int, optional): Number of documents to skip for pagination. Defaults to 0.
limit (int, optional): Maximum number of documents to return. Defaults to 100.
filters (Dict[str, Any], optional): Metadata filters to apply
folder_name (str | List[str], optional): Optional folder scope. Accepts a canonical path (e.g., /projects/alpha) or a list of paths/names.
folder_depth (int, optional): Folder scope depth. None/0 = exact match, -1 = include all descendants, n > 0 = include descendants up to n levels deep.
include_total_count (bool, optional): Include total count of matching documents. Defaults to False.
include_status_counts (bool, optional): Include counts grouped by processing status. Defaults to False.
include_folder_counts (bool, optional): Include counts grouped by folder. Defaults to False.
completed_only (bool, optional): Only return documents with completed status. Defaults to False.
sort_by (str, optional): Field to sort by (created_at, updated_at, filename, external_id). Defaults to “updated_at”.
sort_direction (str, optional): Sort direction (asc or desc). Defaults to “desc”.

Metadata Filters

Pass any JSON filter described in the Metadata Filtering guide via the filters argument. Example:

filters = {
    "$and": [
        {"department": {"$eq": "research"}},
        {"priority": {"$gte": 40}},
        {"start_date": {"$lte": "2024-06-01"}}
    ]
}

response = db.list_documents(filters=filters, include_total_count=True)

Returns

ListDocsResponse object with the following properties:

documents (List[Document]): The list of documents
skip (int): Pagination offset used
limit (int): Limit used
returned_count (int): Number of documents in this response
total_count (Optional[int]): Total matching documents (if include_total_count=True)
has_more (bool): Whether more documents exist beyond this page
next_skip (Optional[int]): Skip value to use for the next page
status_counts (Optional[Dict[str, int]]): Document counts by status (if requested)
folder_counts (Optional[List]): Document counts by folder (if requested)

Examples

Basic Usage

Sync
Async

from morphik import Morphik

db = Morphik()

# Basic listing
response = db.list_documents(limit=10)
print(f"Returned {response.returned_count} documents")

for doc in response.documents:
    print(f"Document: {doc.filename}")

from morphik import AsyncMorphik

async with AsyncMorphik() as db:
    response = await db.list_documents(limit=10)

    for doc in response.documents:
        print(f"Document: {doc.filename}")

Pagination with Total Count

Sync
Async

# Get total count and paginate
response = db.list_documents(limit=10, include_total_count=True)
print(f"Showing {response.returned_count} of {response.total_count} documents")

# Check if more pages exist
if response.has_more:
    next_page = db.list_documents(skip=response.next_skip, limit=10)

response = await db.list_documents(limit=10, include_total_count=True)

if response.has_more:
    next_page = await db.list_documents(skip=response.next_skip, limit=10)

Sorting and Filtering

Sync
Async

# Sort by creation date, newest first
response = db.list_documents(
    sort_by="created_at",
    sort_direction="desc",
    filters={"department": "research"}
)

# Only completed documents
response = db.list_documents(completed_only=True)

response = await db.list_documents(
    sort_by="created_at",
    sort_direction="desc",
    filters={"department": "research"}
)

Nested Folder Queries

# Include all documents under /projects/alpha and its children
response = db.list_documents(
    folder_name="/projects/alpha",
    folder_depth=-1,  # descend through nested folders
    include_folder_counts=True,
)

Aggregates and Counts

Sync
Async

# Get status breakdown
response = db.list_documents(include_status_counts=True)
print(response.status_counts)
# {"completed": 42, "processing": 3, "failed": 1}

# Get folder distribution
response = db.list_documents(include_folder_counts=True)
for folder in response.folder_counts:
    print(f"{folder.folder}: {folder.count} documents")

response = await db.list_documents(include_status_counts=True)
print(response.status_counts)

Document Properties

The Document objects returned by this method have the following properties:

external_id (str): Unique document identifier
content_type (str): Content type of the document
filename (Optional[str]): Original filename if available
metadata (Dict[str, Any]): User-defined metadata
storage_info (Dict[str, str]): Storage-related information
system_metadata (Dict[str, Any]): System-managed metadata
chunk_ids (List[str]): IDs of document chunks
folder_name (Optional[str]): Folder leaf name
folder_path (Optional[str]): Canonical folder path (includes nested parents)

Client

Document Ingestion

Document Retrieval

Data Organization

Document Updates

Batch Operations

Knowledge Graph Operations

Chat & Conversation Management

Document Management

Usage & Monitoring

Parameters

Metadata Filters

Returns

Examples

Basic Usage

Sorting and Filtering

Nested Folder Queries

Aggregates and Counts

Document Properties

Client

Document Ingestion

Document Retrieval

Data Organization

Document Updates

Batch Operations

Knowledge Graph Operations

Chat & Conversation Management

Document Management

Usage & Monitoring

​Parameters

​Metadata Filters

​Returns

​Examples

​Basic Usage

​Pagination with Total Count

​Sorting and Filtering

​Nested Folder Queries

​Aggregates and Counts

​Document Properties

Parameters

Metadata Filters

Returns

Examples

Basic Usage

Pagination with Total Count

Sorting and Filtering

Nested Folder Queries

Aggregates and Counts

Document Properties