• Global community
    • Language:
      • Deutsch
      • English
      • Español
      • Français
      • Português
  • 日本語コミュニティ
    Dedicated community for Japanese speakers
  • 한국 커뮤니티
    Dedicated community for Korean speakers
Exit
0

CSV parsing routine works fast in AI, slow in ID

Valorous Hero ,
Mar 01, 2017 Mar 01, 2017

Copy link to clipboard

Copied

I have a modified version of a csv parsing code from here: excel.js/csv.js at master · thetalecrafter/excel.js · GitHub      Javascript code to parse CSV data - Stack Overflow

I've always used it with Illustrator scripting and it worked instantly. However, using it inside ID takes an outrageous amount of time to do the same thing!

Try the following snippet using #target illustrator and then using #target indesign.

#target illustrator

//#target indesign

function test(){

  var grid = {

   parse: function(csv, reviver, delimiter) {

     delimiter = delimiter  || ',';

     reviver = reviver || function(r, c, v) { return v; };

     var chars = csv.split(''), c = 0, cc = chars.length, start, end, table = [], row;

     while (c < cc) {

       table.push(row = []);

       while (c < cc && '\r' !== chars && '\n' !== chars) {

         start = end = c;

         if ('"' === chars){

           start = end = ++c;

           while (c < cc) {

             if ('"' === chars) {

               if ('"' !== chars[c+1]) { break; }

               else { chars[++c] = ''; } /* unescape "" */

             }

             end = ++c;

           }

           if ('"' === chars) { ++c; }

           while (c < cc && '\r' !== chars && '\n' !== chars && delimiter !== chars) { ++c; }

         } else {

           while (c < cc && '\r' !== chars && '\n' !== chars && delimiter !== chars) { end = ++c; }

         }

         row.push(reviver(table.length-1, row.length, chars.slice(start, end).join('')));

         if (delimiter === chars) { ++c; }

       }

       if ('\r' === chars) { ++c; }

       if ('\n' === chars) { ++c; }

     }

     return table;

   },

   stringify: function(table, replacer, delimiter) {

     replacer = replacer || function(r, c, v) { return v; };

     var csv = '', c, cc, r, rr = table.length, cell;

     for (r = 0; r < rr; ++r) {

       if (r) { csv += '\r\n'; }

       for (c = 0, cc = table.length; c < cc; ++c) {

         if (c) { csv += delimiter; }

         cell = replacer(r, c, table);

         var rx = new RegExp("["+delimiter+"\\r"+"\\n\"]");

         if (rx.test(cell)) { cell = '"' + cell.replace(/"/g, '""') + '"'; }

         csv += (cell || 0 === cell) ? cell : '';

       }

     }

     return csv;

   }

  };

  var f = File.openDialog();

  if(f){

    f.open('r');

    var fileStr = f.read();

    f.close();

  var parsedFile = grid.parse(fileStr);

   var len = parsedFile.length;

alert("The CSV file has " + len + " records.");

  }

};

test();

TOPICS
Scripting

Views

2.0K

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines

correct answers 1 Correct answer

Guru , Feb 28, 2018 Feb 28, 2018

Ok Here's my attempt

You can download it from http://download.creative-scripts.com/InDesign_CSV_Parser

The test file I used was somewhat complicated you can download it from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File

APPInDesign_CSV_Parser.jsx (By Trevor)Andy's Original Script
ILLUSTRATOR5047ms (5 Seconds)3733ms (3.7 Seconds)
INDESIGN6227ms (6.3 Seconds)682689ms (11 minutes and 22.7 seconds)

So as you can see the original script is about a 1/3 quicker than my version when run

...

Votes

Translate

Translate
People's Champ ,
Mar 02, 2017 Mar 02, 2017

Copy link to clipboard

Copied

Hi Vassily,

Never noticed CSV processing was specifically long but I don't use that library of yours. My approach is rather:

var main = function() {

  var csv = getfCSVFile(),

        data;

   

    if !csv ) return;

   

    data = getCSVData ( csv );

   

    if ( !data.rows || !data.rows.length ) {

        alert("No data found sorry !");

        return;

    }

   

    alert( data.rows.length+ " rowws found and headers are\r"+data.headers.join("\r"));

}

var u;

var getfCSVFile = function ( ) {

    var

    os = $.os[0],

    mac = os=="M",

    wF = "CSV files : *.csv;",

    mF = function() {

        return (f instanceof Folder) || /\.csv/i.test(f.name) );

    },

    f = File.openDialog("Please pick a CSV file…", mac? mF : wF );

    return f;

}

var getCSVData = function ( csvFile ) {

    var data = {rows:[]]}, sep = ",";

   

    csvFile.open('r');

    data.headers = csvFile.readln ().split(sep);

    while ( !csvFile.eof ) {

        data.rows.push (csvFile.readln ().split(sep); )

    }

   

    return data;

}

app.doScript ( "main()",u,u,UndoModes.ENTIRE_SCRIPT, "The Script" );

But it will fails differenciating carriage returns from line feeds and it needs you to know the separator. Let me know if it's "speeder" than the  lib above.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Valorous Hero ,
Mar 02, 2017 Mar 02, 2017

Copy link to clipboard

Copied

Yea, a simple parse like that works instantly in both AI and ID, but in my case they need to have commas inside their cells.

If you try my snippet, you'll see a strange and enormous time discrepancy between executing in AI and ID.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Advisor ,
Mar 03, 2017 Mar 03, 2017

Copy link to clipboard

Copied

The JS engine in Indesign is the oldest one of the Adobe products AFIK, and it does have quite some inefficiencies when using regular expressions in a large string.

Some performance gains can be had in your code by moving the regex compilation outside of the loops in the stringify method, and using chunks in the parse function instead of going character by character.

Moving forward, I used with great results a modified version of GitHub - cparker15/CSV-js: A CSV (comma-separated values) parser written in JavaScript. And I heard great things about GitHub - knrz/CSV.js: A simple, blazing-fast CSV parser and encoder. Full RFC 4180 compliance.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Valorous Hero ,
Mar 03, 2017 Mar 03, 2017

Copy link to clipboard

Copied

Interesting. I looked at the first link and saw there's a TODO regarding split lines inside of quotes, and the 2nd link has some code which has to do with higher versions of ES using the 'let' keyword.

Using the 1st method, were you able to get good results when there are line breaks inside of a cell?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Valorous Hero ,
Feb 26, 2018 Feb 26, 2018

Copy link to clipboard

Copied

Coming back to this a year later - I tried to change both of these code resources to work with ES3 but to no avail. For some reason, all kinds of errors appear.

Andy's regexp-heavy parser is the only one which seems to do the job for me, but it still does take a long time in Indesign. So much so that I attempted to try a BridgeTalk approach and see it it would sneak the slowness over some other application and it would have been bypassed. This failed miserably.

Well, I wonder what I'm doing wrong: the 1st code that you Vamitul​ testify of your own usage, had first error being ".push.bind" being unavailable, then after I pasted in an MDN javascript polyfill to add the .bind prototype, another error came immediately after where some array didn't have all its things.

The next parser had that nested comma issue - (that's why I need the parser), but I decided to try it anyway. Of course, some variable had some error, so I didn't get very far.

How are you using this CSV parser in extendscript, Vamitul​ ?

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Guru ,
Feb 28, 2018 Feb 28, 2018

Copy link to clipboard

Copied

Ok Here's my attempt

You can download it from http://download.creative-scripts.com/InDesign_CSV_Parser

The test file I used was somewhat complicated you can download it from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File

APPInDesign_CSV_Parser.jsx (By Trevor)Andy's Original Script
ILLUSTRATOR5047ms (5 Seconds)3733ms (3.7 Seconds)
INDESIGN6227ms (6.3 Seconds)682689ms (11 minutes and 22.7 seconds)

So as you can see the original script is about a 1/3 quicker than my version when run on Illustrator and about 110 times slower than my version when run on InDesign.

My version uses markers if you wanted to make it more robust you could check the csv file for those markers first and replace them with others if needed.

I didn't test too much and didn't do any Googling so there's probably some better stuff out there.

HTH

Trevor

/*

InDesign_CSV_Parser.jsx A Basic CSV parser by by Trevor FOR INDESIGN

Beta 1 - 28 Feb 2018

___      __    __   __ __  __         __  __ __  __        __

| |__||(_   |(_   |_ /  \|__)  ||\ ||  \|_ (_ |/ _ |\ |  /  \|\ || \_/|||

| |  ||__)  |__)  |  \__/| \   || \||__/|____)|\__)| \|  \__/| \||__| ...

ON OTHER APPS THERE ARE QUICKER METHODS

NOT OPTIMIZED

http://creative-scripts.com Custom whatever-you-wants

Can download script file from http://download.creative-scripts.com/InDesign_CSV_Parser

Can download test csv file from http://download.creative-scripts.com/InDesign_CSV_Parser_Test_File

These are markers that are unlikely to appear in the csv if they do then the result will be messed up 😞

\uE130 Marker for "" (These are escaped quotes between quotes)

\uE131 Marker for \n

\uE132 Marker for \n inside cell (in between quotes)

\uE133 Marker for , inside cell (in between quotes)

\uE134 Marker for , separating cells (outside quotes)

One could have the script test the csv file to see if they exist and change if so

// jshint esversion:6, devel: true

*/

function csvToTable(csv) {

    if (!csv) { return; }

    var table, rows, rl;

    // Replace escaped quotes with marker

    table = csv.replace(/""/g, '\uE130');

    // Replace new Lines with marker

    table = table.replace(/\n/g, '\uE131');

    // Swap order of , / \n and escaped quotes when needed

    table = ('\uE131' + table).replace(/(\uE131|,)\uE130"/g, '$1"\uE130').substring(1);

    var quotedTextReg, find, quotedCell;

    quotedTextReg = /"[^"]+"/g;

    var tableBeforeFind, tableAfterFind, findIndex, lastIndex;

    // Find all quoted cells

    while (find = quotedTextReg.exec(table)) { // jshint ignore:line

        quotedCell = find[0];

        findIndex = find.index;

        lastIndex = quotedTextReg.lastIndex;

        tableBeforeFind = table.substring(0, findIndex);

        tableAfterFind = table.substring(lastIndex);

        // Replace new lines within a cell with marker

        // Replace commas within a cell with marker

        quotedCell = quotedCell.replace(/\uE131/g, '\uE132').replace(/,/g, '\uE133');

        table = tableBeforeFind + quotedCell + tableAfterFind;

    }

    rows = table.split(/\uE131/);

    rl = rows.length;

    while (rl--) {

        // Remove outer quotes and replace markers back to desired values

        // It's quicker to replace for each row one at a time than for the whole table at least with very large tables

        // Split to cells

        rows[rl] = rows[rl]

            .replace(/"/g, '')

            .replace(/\uE130/g, '"')

            .replace(/,/g, '\uE134')

            .replace(/\uE133/g, ',')

            .replace(/\uE132/g, '\n')

            .split(/\uE134/);

    }

    return rows;

}

var csv, csvFile, t;

csv = [ // Fallback csvFile in case there's none by the test

    '"""1","2,2","""3""","4,""4","""",6,",",8',

    '"9',

    '9","10',

    '',

    '10","',

    '11","""""12""""",,,,',

    '"13"""",13""",14,,,,,,',

    '"""15""",,,,,,,',

    ',,,,,,,',

    '"1,""2,2"",""""""3"""""",""4,""""4"","""""""",6,"","",8",,,,,,,'

].join('\n');

var csvFile = File.openDialog();

if (csvFile) {

    csvFile.encoding = 'UTF8';

    csvFile.open('r');

    var csv = csvFile.read();

    csvFile.close();

}

t = new Date();

var table = csvToTable(csv);

t = new Date() - t;

$.writeln('Took ' + t + 'ms to process ' + (table.length) + ' rows');

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines
Valorous Hero ,
Feb 28, 2018 Feb 28, 2018

Copy link to clipboard

Copied

LATEST

Much appreciated, I am sure to take advantage of this one during this year. I think for basic CSV parsing with quotes, this will be very useful in Indesign. As far as better stuff out there, once I test this and before that better stuff is posted here, this answer shall be marked as correct.

Votes

Translate

Translate

Report

Report
Community guidelines
Be kind and respectful, give credit to the original source of content, and search for duplicates before posting. Learn more
community guidelines