Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions docstrange/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
"""
Document Data Extractor - Extract structured data from any document into LLM-ready formats.

For engineering drawing extraction use EngineeringDrawingPipeline directly — it is the
dedicated entry point for PDFs and images containing title blocks, dimensions, GD&T, BOM,
notes, and revision history.
"""

from .extractor import DocumentExtractor
Expand All @@ -8,13 +12,56 @@
from .exceptions import ConversionError, UnsupportedFormatError
from .config import InternalConfig

# Engineering drawing extraction surface
from .pipelines.engineering import EngineeringDrawingPipeline
from .schemas.engineering import (
BBoxSchema,
ExtractionElement,
EngineeringDrawingResult,
DimensionElement,
TitleBlockField,
NoteElement,
GDTElement,
BOMRow,
RevisionEntry,
)
from .extractors import (
BaseExtractor,
TitleBlockExtractor,
DimensionExtractor,
NoteExtractor,
GDTExtractor,
BOMExtractor,
RevisionExtractor,
)

__version__ = "1.1.5"
__all__ = [
"DocumentExtractor",
"ConversionResult",
# Generic document extraction
"DocumentExtractor",
"ConversionResult",
"GPUConversionResult",
"CloudConversionResult",
"ConversionError",
"UnsupportedFormatError",
"InternalConfig"
]
"ConversionError",
"UnsupportedFormatError",
"InternalConfig",
# Engineering drawing extraction
"EngineeringDrawingPipeline",
"EngineeringDrawingResult",
"BBoxSchema",
"ExtractionElement",
"DimensionElement",
"TitleBlockField",
"NoteElement",
"GDTElement",
"BOMRow",
"RevisionEntry",
# Individual extractors (for custom pipelines)
"BaseExtractor",
"TitleBlockExtractor",
"DimensionExtractor",
"NoteExtractor",
"GDTExtractor",
"BOMExtractor",
"RevisionExtractor",
]
Empty file added docstrange/api/__init__.py
Empty file.
11 changes: 11 additions & 0 deletions docstrange/api/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""FastAPI application entry point.

Run with:
uvicorn docstrange.api.main:app --reload --port 8000

Then visit http://localhost:8000/docs for the interactive API explorer.
"""

from .routes import create_app

app = create_app()
86 changes: 86 additions & 0 deletions docstrange/api/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Pydantic response models for the DocStrange Engineering API.

These are thin wrappers / re-exports that let FastAPI generate accurate
OpenAPI schemas for every endpoint without duplicating the core schemas.
"""

from typing import Any, Dict, List, Optional

from pydantic import BaseModel

from ..schemas.engineering import (
BBoxSchema,
BOMRow,
DimensionElement,
EngineeringDrawingResult,
ExtractionMetadata,
GDTElement,
NoteElement,
RevisionEntry,
TitleBlockField,
)

__all__ = [
"HealthResponse",
"FullExtractionResponse",
"OverlayBBox",
"OverlayAnnotation",
"OverlaySummary",
"OverlayImageSize",
"OverlayResponse",
# Re-exported schema types used as list element response models
"TitleBlockField",
"DimensionElement",
"NoteElement",
"GDTElement",
"BOMRow",
"RevisionEntry",
]


class HealthResponse(BaseModel):
status: str
service: str
version: str


class FullExtractionResponse(EngineeringDrawingResult):
"""EngineeringDrawingResult extended with an optional overlay payload."""
overlay_json: Optional[Dict[str, Any]] = None


class OverlayBBox(BaseModel):
x: float
y: float
width: float
height: float


class OverlayAnnotation(BaseModel):
change_id: str
type: str
text: str
page: int
confidence: float
bbox: OverlayBBox
bbox_normalized: OverlayBBox
color: str
label: str


class OverlaySummary(BaseModel):
by_type: Dict[str, int]
total: int


class OverlayImageSize(BaseModel):
width: int
height: int


class OverlayResponse(BaseModel):
image_size: OverlayImageSize
page_filter: Optional[int] = None
total_annotations: int
summary: OverlaySummary
annotations: List[OverlayAnnotation]
Loading