package jPDFProcessSamples; import java.io.IOException; import java.util.List; import com.qoppa.pdf.PDFException; import com.qoppa.pdf.TextPosition; import com.qoppa.pdf.annotations.Redaction; import com.qoppa.pdfProcess.PDFDocument; import com.qoppa.pdfProcess.PDFPage; /* * * This programs finds all instances of social security numbers in a PDF document * using a regex expression (pattern matching “123-12-1234”). The SSN are then covered * with redaction annotations and removed from the PDF content when the redaction annotations * are applied or burnt, leaving just a black rectangle where the SSN used to be. * This process is called permanent redaction. * */ public class TextRedactionUsingRegex { public static void main(String[] args) { try { // Open the document PDFDocument pdfDoc = new PDFDocument("input.pdf", null); // Regular expression to check valid SSN String redactSSN = "^(?!666|000|9\\d{2})\\d{3}-(?!00)\\d{2}-(?!0{4})\\d{4}$"; // per page: search text, create redaction annotations, then apply for (int i = 0; i < pdfDoc.getPageCount(); i++) { PDFPage pdfPage = pdfDoc.getPage(i); // Search for the text List searchResults = pdfPage.findTextUsingRegex(redactSSN); //create redaction annotations for (TextPosition textPos : searchResults) { Redaction redact = pdfDoc.getAnnotationFactory().createRedaction("Redaction sample", textPos.getPDFQuadrilaterals()); pdfPage.addAnnotation(redact); } //apply ("burn-in") all redaction annotations on the page pdfPage.applyRedactionAnnotations(); } pdfDoc.saveDocument("output.pdf"); } catch (PDFException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } } }