from cognite.client import CogniteClient
from cognite.client.config import FusionNotebookConfig
client = CogniteClient(
FusionNotebookConfig(api_subversion="20230101-beta")
)
file_id = ... # put in your file id
from typing import Any
from cognite.client.data_classes.contextualization import DiagramConvertResults
def ocr(client, file_id: int, start_page: int = 1, limit: int = 50) -> list[dict[str, Any]]:
"""Get ocr text from a file that has been through diagram/detect before.
Args:
file_id (int): file id
start_page (int): First page to get ocr from.
limit (int): The maximum number of pages to get ocr from.
Returns:
list[dict[str, Any]]: List of OCR results per page.
"""
response = client.diagrams._camel_post(
"/ocr",
json={"file_id": file_id, "start_page": start_page, "limit": limit},
)
items = response.json()["items"]
assert isinstance(items, list)
return items
def ocr_annotation_to_detect_annotation(ocr_annotation: dict[str, any]) -> dict[str, any]:
bounding_box = ocr_annotation["boundingBox"]
vertices = [
{"x": x, "y": y}
for x in [bounding_box["xMin"], bounding_box["xMax"]]
for y in [bounding_box["yMin"], bounding_box["yMax"]]
]
return {"text": ocr_annotation["text"], "region": {"shape": "rectangle", "page": 1, "vertices": vertices}}
def create_ocr_svg(client, file_id: int):
"""
Get OCR text for a single-page PDF and create an SVG that overlays it as rectangles on top of a raster image
Args:
file_id (int): The file ID of the file used to create an OCR SVG.
Returns svg_link
"""
# Verify one page, and also make sure OCR exists.
detect_job = client.diagrams.detect(
[{"name": "dummy"}], file_references=FileReference(file_id=file_id, first_page=1, last_page=1)
)
detect_result = detect_job.result
file_result = detect_result["items"][0]
if file_result["pageCount"] != 1:
raise Exception("The file must have one page")
ocr_result = ocr(client, file_id, 1, 1)[0]["annotations"]
input_items = [
{
"fileId": file_id,
"annotations": [ocr_annotation_to_detect_annotation(a) for a in ocr_result][
:10000
], # For now, a limit of the API
}
]
job = client.diagrams._run_job(
job_path="/convert",
status_path="/convert/",
items=input_items,
job_cls=DiagramConvertResults,
)
res = job.result
return res["items"][0]["results"][0]["svgUrl"]
# Create an SVG file with OCR overlap
create_ocr_svg(client, file_id)