7 Replies Latest reply on Dec 16, 2010 4:44 AM by try67

    this.getPageNthWord and background image problem

    ingrimm001

      Hi Folks,

       

      I'm having issues with this.getPageNthWord. I use this script to get a certain string via regexp from a document. After this string is retrieved it is used to create a copy of the same document via extractPages. Sometimes there will be a pretty large background image inside my pdfs. If this is the case the method is unable to get any text-ish information out of the document at all. The whole process looks like this:

       

      1. Use Access and Indesign to create a PDF with certain data and images from a blueprint.

      2. Result is a approx. 1200 pages long pdf file with around 200 Datasets

      3. Split these Pages and use a string in the current set as filename (this is where the error occurs, returns null).

      4. check if this file already exsists and add a "_n++" at the end if true

      5. save these pages to the given path

       

      i have uploaded a few screenshots to my flickr account to show an example of a file that works quite well and another one that doesnt work at all.

       

      http://www.flickr.com/photos/57181796@N08/

       

      im sorry but i cannot share the original files with you since they use client data.

       

      this is the full script:

       

      /**
      * Path where files should be saved
      * Special Characters like spaces should be escaped with \
      * If you want to modify the folder, use following form:
      * "/Driveletter/Foldername/../LastFolderName/"
      * Make sure not to forget the / before and after the location
      */

       


      var filepath = "/n/05_2Projekte/0000000715_2010_GAG_Dettingen_KIRU/07_web/pdf/";

       

      /**
      * Number of expose pages - feel free to change
      */

       

      var pageType = 8;

       

      /**
      * regular expression for search
      */

       

      var idNumber = /08\d\d\d\d\-\d\d\d\-\d\d\d\d\d-\d\d\d-\d\d/g;

       

      /**
      * if possible this function extracts the searched number as string
      *
      * @param rematch string which should be searched in document
      * @return null if rematch is not found or string if rematch is found
      */

       

      function ExtractFromDocument(reMatch) {
        try {
               var Out = new Object();
               for (var i = 0; i < 1; i++)
            {
                numWords = this.getPageNumWords(i);
                var PageText = "";
                for (var j = 0; j < 30;j++) {
                    var word = this.getPageNthWord(i,j,false);
                    PageText += word;
                }
                var strMatches = PageText.match(reMatch);
                if (strMatches == null) continue;
            }
            return strMatches;
        } catch(e)
        {
            app.alert("Processing error: "+e)
        }
      }

       

      /**
      * tries to load given filename (extracted number)
      *
      * @param filename string of file which should be checked
      * @param n number to iterate while checking for files
      * @return true if file exists or false if not
      */

       

      function checkIfFileExists(filename, n) {
          var existingDoc = false;
          try {
              if( n == 0) {
                  var checkDoc = app.openDoc(filepath+filename+"-000.pdf");
              } else {
                  var checkDoc = app.openDoc(filepath+filename+"-000_"+n+".pdf");
              }
              checkDoc.closeDoc();
              existingDoc = true;
          } catch (e) {
          }
          if( existingDoc == true ) {
              n = n+1;
              n = checkIfFileExists(filename, n);
          }
          return n;
      }

       

      var pageAmount = this.numPages;
      for( i=0; i<pageAmount; i+pageType ) {
          var filename = ExtractFromDocument(idNumber);
          fileExistence = checkIfFileExists(filename, 0);
          if(fileExistence != 0) {
              this.extractPages({nEnd:(pageType-1), cPath : filepath+filename+"-000_"+fileExistence+".pdf"}); 
          } else {
              this.extractPages({nEnd:(pageType-1), cPath : filepath+filename+"-000.pdf"});
          }
          this.deletePages({nStart:0, nEnd: pageType-1});
      }

       

       

      thanks in advance :-)

       

      Michael