- AI Union | KI Automatisierung & Agenten
- Posts
- Mistral OCR mit n8n macht PDF Extraktion zum Kinderspiel!
Mistral OCR mit n8n macht PDF Extraktion zum Kinderspiel!

In diesem Video erkläre ich dir ausführlich, wie du mithilfe von n8n die OCR von Mistral einbindest, um Daten aus PDFs ohne Mühe zu extrahieren.
Ressourcen:
{
"name": "Mistral OCR",
"nodes": [
{
"parameters": {
"formTitle": "Invoice",
"formDescription": "Please upload your invoice",
"formFields": {
"values": [
{
"fieldLabel": "Invoice",
"fieldType": "file",
"multipleFiles": false,
"requiredField": true
}
]
},
"options": {}
},
"type": "n8n-nodes-base.formTrigger",
"typeVersion": 2.2,
"position": [
40,
0
],
"id": "9392dda2-4f1e-40c6-bcfb-4a4c92f5553b",
"name": "On form submission",
"webhookId": "2024afb0-ab97-4820-a9e3-6af4067ebfd1"
},
{
"parameters": {
"method": "POST",
"url": "https://api.mistral.ai/v1/files",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"contentType": "multipart-form-data",
"bodyParameters": {
"parameters": [
{
"name": "purpose",
"value": "ocr"
},
{
"parameterType": "formBinaryData",
"name": "file",
"inputDataFieldName": "Invoice"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
200,
0
],
"id": "592ed05c-0390-43f3-8e90-8003dc4fa380",
"name": "Upload to Mistral",
"credentials": {
"httpHeaderAuth": {
"id": "9KN3gNpI4uygBsRe",
"name": "Mistral OCR Demo"
}
}
},
{
"parameters": {
"url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendQuery": true,
"queryParameters": {
"parameters": [
{
"name": "expiry",
"value": "24"
}
]
},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "application/json"
},
{
"name": "Authorization",
"value": "Bearer $MISTRAL_API_KEY"
}
]
},
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
360,
0
],
"id": "c242e491-269b-421f-91f4-474a01b7bc71",
"name": "Get Signed URL",
"credentials": {
"httpHeaderAuth": {
"id": "9KN3gNpI4uygBsRe",
"name": "Mistral OCR Demo"
}
}
},
{
"parameters": {
"method": "POST",
"url": "https://api.mistral.ai/v1/ocr",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"sendBody": true,
"specifyBody": "json",
"jsonBody": "={\n \"model\": \"mistral-ocr-latest\",\n \"document\": {\n \"type\": \"document_url\",\n \"document_url\": \"{{ $json.url }}\"\n },\n \"include_image_base64\": true\n}",
"options": {}
},
"type": "n8n-nodes-base.httpRequest",
"typeVersion": 4.2,
"position": [
540,
0
],
"id": "22fd8b30-dbb7-4e27-af1d-dacf8c2661b0",
"name": "Get OCR Results",
"credentials": {
"httpHeaderAuth": {
"id": "9KN3gNpI4uygBsRe",
"name": "Mistral OCR Demo"
}
}
},
{
"parameters": {
"text": "={{ $json.pages[0].markdown }}",
"attributes": {
"attributes": [
{
"name": "date",
"type": "date",
"description": "the date of the invoice"
},
{
"name": "invoice number",
"description": "the number of the invoice"
}
]
},
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"typeVersion": 1,
"position": [
680,
0
],
"id": "23ccd75e-7aad-436c-828f-3c3918161632",
"name": "Information Extractor"
},
{
"parameters": {
"options": {}
},
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"typeVersion": 1,
"position": [
780,
220
],
"id": "1ffc1fd9-e74c-498e-a7e9-adc8869bf0d4",
"name": "OpenAI Chat Model",
"credentials": {
"openAiApi": {
"id": "Qy81ja4SvvQtfyyq",
"name": "OpenAi account"
}
}
}
],
"pinData": {
"Upload to Mistral": [
{
"json": {
"id": "a9cc6df0-b05c-4fd1-9246-3f571edb6451",
"object": "file",
"bytes": 381709,
"created_at": 1742988930,
"filename": "sample-invoice.pdf",
"purpose": "ocr",
"sample_type": "ocr_input",
"num_lines": null,
"source": "upload"
}
}
],
"Get Signed URL": [
{
"json": {
"url": "https://mistralaifilesapiprodswe.blob.core.windows.net/fine-tune/e7c88b2f-267d-44f1-b553-83a7e16e1801/a9cc6df0b05c4fd192463f571edb6451.pdf?se=2025-03-27T11%3A48%3A34Z&sp=r&sv=2025-01-05&sr=b&sig=KK%2B2zb1qJaZpFEecVQrDAr7j3cMZgtRoGS4YdP%2BhIEE%3D"
}
}
],
"Get OCR Results": [
{
"json": {
"pages": [
{
"index": 0,
"markdown": "# CPB SOFTWARE (GERMANY) GMBH \n\nIm Bruch 3, 63897 Miltenberg\nTelefon: +49937197860\n[email protected]\nwww.cpb-software.com\n\nCPB Software (Germany) GmbH - Im Bruch 3 - 63897 Miltenberg/Main\nMusterkunde AG\nMr. John Doe\nMusterstr. 23\n12345 Musterstadt\nName: Stefanie Müller\nPhone: +49 9371 9786-0\n\n## Invoice WMACCESS Internet\n\nVAT No. DE199378386\n\n| Invoice No | Customer No | Invoice Period | Date |\n| :--: | :--: | :--: | :--: |\n| 123100401 | 12345 | 01.02.2024 - 29.02.2024 | 1. März 2024 |\n\n\n| Service Description | Amount <br> -without VAT- | quantity | Total Amount |\n| :--: | :--: | :--: | :--: |\n| Basic Fee wmView | 130,00€ | 1 | 130,00 € |\n| Basis fee for additional user accounts | 10,00 € | 0 | 0,00 € |\n| Basic Fee wmPos | 50,00 € | 0 | 0,00 € |\n| Basic Fee wmGuide | 1.000,00 € | 0 | 0,00 € |\n| Change of user accounts | 10,00 € | 0 | 0,00 € |\n| Transaction Fee T1 | 0,58 € | 14 | 8,12 € |\n| Transaction Fee T2 | 0,70 € | 0 | 0,00 € |\n| Transaction Fee T3 | 1,50 € | 162 | 243,00 € |\n| Transaction Fee T4 | 0,50 € | 0 | 0,00 € |\n| Transaction Fee T5 | 0,80 € | 0 | 0,00 € |\n| Transaction Fee T6 | 1,80 € | 0 | 0,00 € |\n| Transaction Fee G1 | 0,30 € | 0 | 0,00 € |\n| Transaction Fee G2 | 0,30 € | 0 | 0,00 € |\n| Transaction Fee G3 | 0,40 € | 0 | 0,00 € |\n| Transaction Fee G4 | 0,40 € | 0 | 0,00 € |\n| Transaction Fee G5 | 0,30 € | 0 | 0,00 € |\n| Transaction Fee G6 | 0,30 € | 0 | 0,00 € |\n| | Total | | 381,12 € |\n| | VAT $19 \\%$ | | 72,41 € |\n| | Gross Amount Incl. VAT | | 453,53 € |\n\nTerms of Payment: Immediate payment without discount. Any bank charges must be paid by the invoice recipient.\nBank fees at our expense will be charged to the invoice recipient!\n\nPlease credit the amount invoiced to IBAN DE29 123456789012345678 | BIC GENODE51MIC (SEPA Credit Transfer)",
"images": [],
"dimensions": {
"dpi": 200,
"height": 2339,
"width": 1654
}
},
{
"index": 1,
"markdown": "# Invoice Details \n\nPeriod: $\\quad 01.02 .2024$\nto 29.02 .2024\n\n| Unit: | Musterkunde AG | 12345 |\n| :--: | :--: | :--: |\n\n\n| Request sections: | T1: | T2: | T3: | T4: | T5: | T6: | G1: | G2: | G3: | G4: | G5: | G6: |\n| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |\n| Amount in Euro: | 0.58 | 0.70 | 1.50 | 0.50 | 0.80 | 1.80 | 0.30 | 0.30 | 0.40 | 0.40 | 0.30 | 0.30 |\n\n\n| user-account-1 | 10 | 0 | 99 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 154,30 € |\n| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |\n| user-account-2 | 4 | 0 | 63 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 96,82 € |\n\n\n| Transaction Fee Seq | T1: | T2: | T3: | T4: | T5: | T6: | G1: | G2: | G3: | G4: | G5: | G6: | |\n| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |\n| Queries in Total: | 14 | 0 | 162 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |\n| Total in Euro: | 8.12 € | 0.00 € | 243.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 0.00 € | 251,12 € |\n\nThe explanation of the query fee categories (T1 to T6 and G1 to G6) can be found on our website:\nhttps://www.wmaccess.com/abfragekategorien",
"images": [],
"dimensions": {
"dpi": 200,
"height": 2339,
"width": 1654
}
},
{
"index": 2,
"markdown": "# 2pb <br> software \n\n## Invoice Details for wmView Query Reference\n\nPeriod: $\\quad 01.02 .2024$\nto 29.02 .2024\n\n| Unit: | Musterkunde AG | 12345 |\n| :-- | :-- | :-- |\n\nQuery Reference:\n\n| \"Not specified\" | wmview. wmProfile and User Profiles Query Segments: | | | | | | |\n| :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: |\n| | T1: | T2: | T3: | T4: | T5: | T6: | |\n| | | | | | | | |\n| AZR/31/27439 | 4 | 0 | 3 | 0 | 0 | 0 | 15,82 |\n| CCL/3715 | 0 | 0 | 12 | 0 | 0 | 0 | 18,00 |\n| CRS/28432 | 0 | 0 | 4 | 0 | 0 | 0 | 6,00 |\n| Ce/52113 | 5 | 0 | 36 | 0 | 0 | 0 | 56,90 |\n| GS 32090 | 0 | 0 | 19 | 0 | 0 | 0 | 28,50 |\n| Kpi/22695 | 1 | 0 | 7 | 0 | 0 | 0 | 11,08 |\n| PG 7772 | 2 | 0 | 8 | 0 | 0 | 0 | 10,16 |\n| Rjn/11138 | 0 | 0 | 11 | 0 | 0 | 0 | 16,50 |\n| SF-M 596/99-08 | 0 | 0 | 15 | 0 | 0 | 0 | 22,50 |\n| Ttrb/17885 | 0 | 0 | 5 | 0 | 0 | 0 | 7,50 |\n| WPN:24791 | 1 | 0 | 23 | 0 | 0 | 0 | 35,08 |\n| Wwt/15658 | 1 | 0 | 4 | 0 | 0 | 0 | 6,58 |\n| | 0 | 0 | 11 | 0 | 0 | 0 | 16,50 |\n\n\n| Prize for each Query in Euro: | | | | | | |\n| :--: | :--: | :--: | :--: | :--: | :--: | :--: |\n| 0,56 | 0,70 | 1,50 | 0,50 | 0,80 | 1,80 | |",
"images": [],
"dimensions": {
"dpi": 200,
"height": 2339,
"width": 1654
}
}
],
"model": "mistral-ocr-2503-completion",
"usage_info": {
"pages_processed": 3,
"doc_size_bytes": 381709
}
}
}
]
},
"connections": {
"On form submission": {
"main": [
[
{
"node": "Upload to Mistral",
"type": "main",
"index": 0
}
]
]
},
"Upload to Mistral": {
"main": [
[
{
"node": "Get Signed URL",
"type": "main",
"index": 0
}
]
]
},
"Get Signed URL": {
"main": [
[
{
"node": "Get OCR Results",
"type": "main",
"index": 0
}
]
]
},
"Get OCR Results": {
"main": [
[
{
"node": "Information Extractor",
"type": "main",
"index": 0
}
]
]
},
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "Information Extractor",
"type": "ai_languageModel",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1"
},
"versionId": "9c6e8682-81d6-4512-be96-245ce5ecefd3",
"meta": {
"instanceId": "c4015987044746ece7ec1fa4a093d971fd09a368a13e934a29cf42d32d45a202"
},
"id": "ntTxjKlr12qhVulj",
"tags": []
}
Reply