2 Replies Latest reply on Jul 27, 2011 8:07 AM by Bernd Alheit

    My phone extractor script does't extract duplicate numbers

    fushyuma

      Can someboy help me tweak this script? It is working quite well but i noticed that it extracts only 1 number for multiple occurence. i need it to extract all the numbers even if it appears multiple times.

       

      here's the script

       

      /* Extract Phone numbers From the Document */
      // This script will scan all pages of the input document
      // and extract valid Phone numbers into new PDF document.
      // Output PDF document will be placed in the same folder
      // as input. The name of the output document will be:
      // Original filename + "_Extracted_Phone"

       

      // Number format
      var rePhone=/((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4} /g;

       

      var strExt = "_Extracted_Phone.pdf";
      var strIntro = "Phone numbers extracted from document: ";
      var strFinal = "Total number of Phone numbers extracted: " ;

       

      ExtractFromDocument(rePhone,strExt,strIntro,strFinal);

       

      function ExtractFromDocument(rePhone, strFileExt, strMessage1, strMessage2)
      {
      var chWord, numWords;

       

      // construct filename for output document
      var filename = this.path.replace(/\.pdf$/, strFileExt);

       

      // create a report document
      try {
          var ReportDoc = new Report();
          var Out = new Object(); // array where we will collect all our emails before outputing them
         
          ReportDoc.writeText(strMessage1 + this.path);
          ReportDoc.divide(1);      // draw a horizontal divider
          ReportDoc.writeText(" "); // write a blank line to output
         
          for (var i = 0; i < this.numPages; i++)
          {
              numWords = this.getPageNumWords(i);
              var PageText = "";
              for (var j = 0; j < numWords; j++) {
                  var word = this.getPageNthWord(i,j,false);
                  PageText += word;
                  }
         
              var strMatches = PageText.match(rePhone);
              if (strMatches == null) continue;
              // now output matches into report document
              for (j = 0; j < strMatches.length; j++) {
                  Out[strMatches[j]] = true; // store email as a property name
                  }
          }
         
          var nTotal = 0;
          for (var prop in Out)
          {
              ReportDoc.writeText(prop);
              nTotal++;
          }
         
          ReportDoc.writeText(" "); // output extra blank line
          ReportDoc.divide(1); // draw a horizontal divider
          ReportDoc.writeText(strMessage2 + nTotal);
         
          // save report to a document
          ReportDoc.save(
              {
              cDIPath: filename
              });

       

      }
      catch(e)
      {
      app.alert("Processing error: "+e)
      }
         
      } // end of the function

       

      Thanks in advance