Troubles with pdf.js promises -
i'm trying implement pdf word count in javascript. came across pdf.js uses promises. there way wait till script done before returning count? know goes against idea of promises, other js pdf readers out there either produce bunch of gibberish or return nothing. in current form function return word count of 0.
function countwords(pdfurl){ var pdf = pdfjs.getdocument(pdfurl); var count = 0; pdf.then(function(pdf) { var maxpages = pdf.pdfinfo.numpages; (var j = 1; j <= maxpages; j++) { var page = pdf.getpage(j); var txt = ""; page.then(function(page) { var textcontent = page.gettextcontent(); textcontent.then(function(page){ for(var i=0;i<page.items.length;i++){ txtadd = page.items[i].str txt += txtadd.replace(/[^a-za-z0-9:;,.?!-() ]/g,''); } count = count + txt.split(" ").length; }) }) } return count; });
}
promises cannot handled in sync manner. countwords cannot return value , has wait on inner promises (one document , multiple pages , text contexts) resolved. countwords must return promise or accept callback. best way try return , chain then() calls. when needed join resolution use promise.all:
function countwords(pdfurl){ var pdf = pdfjs.getdocument(pdfurl); return pdf.then(function(pdf) { // calculate total count document var maxpages = pdf.pdfinfo.numpages; var countpromises = []; // collecting page promises (var j = 1; j <= maxpages; j++) { var page = pdf.getpage(j); var txt = ""; countpromises.push(page.then(function(page) { // add page promise var textcontent = page.gettextcontent(); return textcontent.then(function(page){ // return content promise for(var i=0;i<page.items.length;i++){ txtadd = page.items[i].str txt += txtadd.replace(/[^a-za-z0-9:;,.?!-() ]/g,''); } return txt.split(" ").length; // value page words }); })); } // wait pages , sum counts return promise.all(countpromises).then(function (counts) { var count = 0; counts.foreach(function (c) { count += c; }); return count; }); }); } // waiting on countwords finish completion, or error countwords("https://cdn.mozilla.net/pdfjs/tracemonkey.pdf").then(function (count) { alert(count); }, function (reason) { console.error(reason); });
<script src="https://npmcdn.com/pdfjs-dist/build/pdf.js"></script>
Comments
Post a Comment