Hey folks. I always cringe about importing csv files from some outside source and typically I cry about it for 15 seconds and then open it up in excel and save it to xml or some other format that I trust just so I don't have to deal with commas, quotes, and carriage returns. Of course today I had a situation where I couldn't just do that (silly clients), so I broke down and used those 15 seconds to whip up a csv parser. While there is no real "standard" for csv documents this parser will handle embedded characters such as strings with quotes, commas, and returns inside of them, rows that end with carriage returns, new lines, or a combination of the two, and it even gives you the choice of stripping out the white spaces or not. I was going to do this with regexes but then I remembered how terrible I am with them (also, this ends up being faster than some good regexes some other folks suggested). Code is below and if interested, I based my csv standards on the rules found here
http://en.wikipedia.org/wiki/Comma-separated_values- Code: Select all
/********************************************************************************
@method: parseCSV(sData, bTrim)
@arg sData : [string] The csv data you want to parse
@opt bTrim : [boolean] optional argument used if you want to trim the white spaces off the fields
@return aParsedData : [array] multi-dimensional array of rows and columns
@description : Takes a string and applies csv rules to it to form an array of rows and columns.
As there aren't hard standards for csv I applied the rules from the wiki http://en.wikipedia.org/wiki/Comma-separated_values
@note:
@history: 01/07/2010 MSF Created parseCSV method
********************************************************************************/
var sData;
var bTrim = false;
var aParsedData;
var aRow;
var aSearch;
var bInQuotes;
var bSpecial;
var sColumn;
// If no string to parse was passed there is no reason to run this method so return
if (!arguments[0])
{
return;
}
sData = arguments[0];
// If column data should be trimmed set the marker
if (arguments[1])
{
bTrim = true;
}
// Set the default character search array made up of the special characters that csv has to deal with
aSearch = ['"', ",", "\n", "\r"];
// Initialize the return array, the first column, and the first row array
aParsedData = new Array();
sColumn = "";
aRow = new Array();
// Loop through the characters in the string
for (var i = 0; i < sData.length; i++)
{
// Reset the special character marker to false
bSpecial = false;
// Loop through the current special character search array
for (var s = 0; s < aSearch.length; s++)
{
// And if we hit a speical character note that
if ( sData[i] == aSearch[s])
{
bSpecial = true;
break;
}
}
// If this is a special character that we are currently looking for it process it separately
if ( bSpecial )
{
// Switch on the current character (we can only get to commas, carriage returns, and line feeds if we aren't in double-quotes which is take care of by the aSearch array)
switch (sData[i])
{
// If the character is a comma then we know this is the end of a column of data so push it to the row and reinitialize the column string
case ",":
if (bTrim)
{
sColumn = utils.stringTrim(sColumn);
}
aRow.push(sColumn);
sColumn = "";
break;
// If this is a carriage return then it is the end of a row so push the column to the row and then push the row to the ParsedData array
case "\r":
if (bTrim)
{
sColumn = utils.stringTrim(sColumn);
}
aRow.push(sColumn);
aParsedData.push(aRow);
aRow = new Array();
sColumn = "";
// If the next character happens to be a new line then skip it because this is a csv whose rows end with \r\n
if ( sData[i+1] && "\n" == sData[i+1] )
{
i++;
}
break;
// If this is a new line then it is the end of a row so push the column to the row and then push the row to the ParsedData array
case "\n":
if (bTrim)
{
sColumn = utils.stringTrim(sColumn);
}
aRow.push(sColumn);
aParsedData.push(aRow);
aRow = new Array();
sColumn = "";
break;
// Quotes are highly special, read on
case '"':
// Check to see if we are already inside a set of double-quotes
if ( bInQuotes )
{
// If we are inside double-quotes, are currently on a quote, AND the next character is a quote it means we have found an escaped quote that we want to allow
// So add a single quote to the column and then skip the next character by incrementing i
if (sData[i+1] && '"' == sData[i+1])
{
sColumn += '"';
i++;
}
// If we are inside quotes but the next character is not another set of double quotes it means we have reached the quoted data so change the Search array back to our normal set and move on
else
{
aSearch = ['"', ",", "\n", "\r"];
bInQuotes = false;
}
}
// If we are not inside a set of double-quotes, this is out first set and means that the next special character that we care about is out closing double-quotes
else
{
bInQuotes = true;
aSearch = ['"'];
}
break;
}
}
// If this is NOT a special character just add it to the current column's data
else
{
sColumn += sData[i];
}
}
// Finalize the data by adding the last column and row to our return data
if (bTrim)
{
sColumn = utils.stringTrim(sColumn);
}
aRow.push(sColumn);
aParsedData.push(aRow);
// Enjoy your data
return aParsedData;