Hi Jeroen
apache pdfbox seems to to a good job, still have to do some benchmarks for speed
function readPDFDocument(obj,ctx){
try {
var is=com.mendix.core.Core.getFileDocumentContent(
ctx,
obj
);
var rabfi=new org.apache.pdfbox.io.RandomAccessBufferedFileInputStream(is);
text = new java.lang.String();
var parser = new org.apache.pdfbox.pdfparser.PDFParser(rabfi);
var cosDoc = parser.getDocument();
var pdfStripper = new org.apache.pdfbox.text.PDFTextStripper();
var pdDoc=new org.apache.pdfbox.pdmodel.PDDocument(cosDoc);
parser.parse();
text = pdfStripper.getText(pdDoc);
var docxLines = text.split('\n');
alert(docxLines);
}catch(e) {
alert(e);
}
}
var ctx=root.getContext();
var arr_obj=com.mendix.core.Core.retrieveXPathQuery(
ctx,
"//System.FileDocument[contains(Name,'.pdf')]",
0,
1,
{}
);
if(arr_obj.length>0){
readPDFDocument(arr_obj[0],ctx);
}else{
alert("Not found");
}
...excuse my Graal