-
pip
pip install deepdoc-lib --extra-index-url https://huangpustar.github.io/deepdoc-lib/simple -
pyproject
Add the following to pyproject.toml
For uv:
[[tool.uv.index]] name = "deepdoc" url = "https://huangpustar.github.io/deepdoc-lib/simple" [project] dependencies = [ "deepdoc-lib", ]
from deepdoc import PdfParser, DocxParser, ExcelParser
# 解析 PDF
pdf_parser = PdfParser()
result = pdf_parser("document.pdf")
# 解析 Word
docx_parser = DocxParser()
result = docx_parser("document.docx")
# 解析 Excel
excel_parser = ExcelParser()
with open("data.xlsx", "rb") as f:
result = excel_parser(f.read())from deepdoc import create_vision_model- Use Environment Variable
# 视觉模型配置
export DEEPDOC_VISION_PROVIDER="qwen"
export DEEPDOC_VISION_API_KEY="your-api-key"
export DEEPDOC_VISION_MODEL="qwen-vl-max"
export DEEPDOC_VISION_LANG="Chinese"
export DEEPDOC_VISION_BASE_URL="http://your_base_url"
# 其他配置
export DEEPDOC_LIGHTEN=0 # 是否使用轻量模式vision_model = create_vision_model()- Use Default Provider
export DEEPDOC_VISION_API_KEY="your-api-key"vision_model = create_vision_model("qwen")Supported providers: ["openai", "qwen", "zhipu", "ollama", "gemini", "anthropic"]
- Use Configuration File
Create deepdoc_config.yaml:
vision_model:
provider: "qwen"
model_name: "qwen-vl-max"
api_key: "your-api-key"
lang: "Chinese"
base_url : "http://your-base-url"vision_model = create_vision_model("/path/to/deepdoc_config.yaml")with open("image.jpg", "rb") as f:
result = vision_model.describe_with_prompt(f.read())