package jPDFProcessSamples;

import com.qoppa.ocr.OCRBridge;
import com.qoppa.ocr.TessJNI;
import com.qoppa.pdfProcess.PDFDocument;
import com.qoppa.pdfProcess.PDFPage;

public class OCRPDFPages
{
	 /* This sample code will open a PDF and OCR its pages */
	 /* Then save the document */
	
	public static void main(String[] args)
	{
		try
		{
					
			// Load a PDF that contains scanned pages needing to be OCRed
			PDFDocument pdfDoc = new PDFDocument("C:/test/test.pdf", null);
			// follow instruction in our kb to copy the tesseract libraries under "C:/test/tesseract" 
			// and language files "C:/test/tesseract/tessdata"
			OCRBridge.initialize("C:/test/tess", "C:/test/tess/tessdata");
			TessJNI ocr = new TessJNI();
			for (int count = 0; count < pdfDoc.getPageCount(); ++count)
			{
			    PDFPage page = pdfDoc.getPage(count);
			    String pageOCR = ocr.performOCR("eng", page, 300);
			    page.insert_hOCR(pageOCR, false);
			}
			
			// Save the output document
			pdfDoc.saveDocument("C:/test/test_ocr.pdf");
		}
		catch (Throwable t)
		{
			t.printStackTrace();
		}
	}
}