CSVFile
A CSVFile file is instantiated by passing the content of a loaded CSV file into the contructor. The implementation assumes that the first line of the CSV contains column titles that correctly correspond to included data.
CSVFile( file_content ) CSVFile.prototype.getValueFor CSVFile.prototype.getValue CSVFile.prototype.getNrOfRows
Generally, a CSVFile is loaded and then iterated through to retrieve values for some purpose. A common use is to load the values into an HTML table (see Load.Table).
var csv_file = new CSVFile( text ); var n = csv_file.getNrOfRows(); for ( var i=0; i < n; i++ ) { var username = csv_file.getValueFor( i, ["First name", "Firstname", "Given name", "Given names"] ); ... }
CSVFile
CSVFile( file_content )
function CSVFile( file_content )
{
this.headers = new Array();
this.rows = new Array();
this.parseContent( file_content );
}
getValueFor
getValueFor( row, labels )
Searches the 'row' specified for the first of the labels specified in array that has a value. This allows CSV files that have inconsistent column titles, e.g., "Given name" or "Given names", to be searched.
CSVFile.prototype.getValueFor
=
function( row, labels )
{
var value = "";
var n = labels.length;
for ( var i=0; i < n; i++ )
{
var value = this.getValue( row, labels[i] );
if ( "" != value ) break;
}
if ( undefined == value ) value = "";
return value;
}
CSVFile.prototype.getValue
=
function( row, label )
{
var value = "";
var column = -1;
var n = this.headers.length;
for ( var i=0; i < n; i++ )
{
var text1 = label.toLowerCase().trim();
var text2 = this.headers[i].toLowerCase().trim();
if ( text1 == text2 )
{
column = i; break;
}
}
if ( -1 != column )
{
var array = this.rows[row];
value = array[column];
}
return value;
}
CSVFile.prototype.getNrOfRows
=
function()
{
return this.rows.length;
}
Helper functions
CSVFile.prototype.CSVLineRE = new RegExp('("[\\w ,]+" ?|[\\w ]*), ?("[\\w ,]+" ?|[\\w ]*)$');
CSVFile.prototype.parseContent
=
function( file_content )
{
var line_reader = new CSVFile.LineReader( file_content, 100000 );
var line = line_reader.readLine();
if ( false !== line )
{
var trimmed = line.trim(); //trims CRs from Mac apps such as Numbers.
//if (!this.CSVLineRE.test(trimmed)) throw "Header of file does not match expected Excel/LibreOffice CSV format."
this.headers = CSVFile.SplitAndTrim( trimmed );
while ( (false !== (line = line_reader.readLine())) )
{
var fields = CSVFile.SplitAndTrim( line );
if ( "" != fields.join( "" ) )
{
this.rows.push( fields );
}
}
}
}
CSVFile.SplitAndTrim
=
function( line )
{
var line = line.replace( /^/, '' );
var array = new Array();
var bits = CSVFile.SplitWhileRespectingQuotes( line, "," );
var n = bits.length;
for ( var i=0; i < n; i++ )
{
var field = bits[i];
field = field.trim();
if ( "" != field )
{
var x = field.length - 1;
if ( '"' == field.charAt( x ) ) field = field.substring( 0, x );
if ( '"' == field.charAt( 0 ) ) field = field.substring( 1 );
field = field.trim();
}
array.push( field );
}
return array;
}
CSVFile.SplitWhileRespectingQuotes
=
function( line, delimiter )
{
var array = Array();
var out = true;
var s = 0;
var n = line.length;
for ( var i=0; i < n; i++ )
{
switch ( line.charAt( i ) )
{
case '"':
out = !out;
break;
case delimiter:
if ( out )
{
array.push( line.substring( s, i ) );
s = i + 1;
}
break;
}
}
if ( s < n )
{
array.push( line.substring( s, n ) );
}
return array;
}
CSVFile.LineReader
=
function( file_content, limit )
{
this.content = CSVFileLineReaderUnicodeStrip( file_content );
this.pos = 0;
this.lines = 0;
this.limit = limit;
}
CSVFile.LineReader.prototype.readLine
=
function()
{
var line = false;
var loop = true;
var in_quote = false;
if ( ++this.lines < this.limit )
{
if ( this.pos < this.content.length )
{
line = "";
while ( this.pos < this.content.length )
{
var ch = this.content[this.pos++];
if ( 127 < ch )
{
// ignore
}
else
if ( '"' == ch )
{
in_quote = !in_quote;
line += ch;
}
else
if ( '\n' == ch )
{
if ( in_quote ) line += ch;
else break;
}
else if ( '\r' == ch )
{
if ( in_quote ) line += ch;
else
{
if ( '\n' == this.content[this.pos] )
{
this.pos++;
}
break;
}
}
else
{
line += ch;
}
}
}
}
return line;
}
CSVFile.LineReader.prototype.unidecode
=
CSVFileLineReaderUnicodeStrip;
function CSVFileLineReaderUnicodeStrip( content )
{
content = CSVFile.LineReader.prototype.unirecode( content );
var str = "";
var n = content.length;
var i = 0;
while ( i < n )
{
var ch = content.charCodeAt(i);
if ( (ch == (0xFC | ch)) && (i+5 < n) ) // 6 byte unicode
{
i += 6;
}
else
if ( (ch == (0xF8 | ch)) && (i+4 < n) ) // 5 byte unicode
{
i += 5;
}
else
if ( (ch == (0xF0 | ch)) && (i+3 < n) ) // 4 byte unicode
{
i += 4;
}
else
if ( (ch == (0xE0 | ch)) && (i+2 < n) ) // 3 byte unicode
{
var tmp1 = utf8_to_unicode( content.substring( i, i + 3 ) );
var tmp2 = UTF8Codepoint( content.substring( i, i + 3 ) );
var ent = CodepointToEntity( tmp2 );
str += ent;
i += 3;
}
else
if ( (ch == (0xC0 | ch)) && (i+1 < n) ) // 2 byte unicode
{
i += 2;
}
else
if ( (ch == (0xC0 | ch)) && (i+1 < n) ) // 2 byte unicode
{
i += 2;
}
else
if ( (ch == (0x80 | ch)) ) // extra byte
{
console.log( "Warning detected invalid unicode" );
i += 1;
}
else
{
str += content[i]; // ascii character
i++;
}
}
return str;
}
CSVFile.LineReader.prototype.unirecode
=
CSVFileLineReaderUnicodeRecode;
/*
* Based on:
* https://stackoverflow.com/questions/17267329/converting-unicode-character-to-string-format
*/
function CSVFileLineReaderUnicodeRecode( content )
{
return content.replace( /\\u[\dA-F]{3}/gi, Unirecode );
}
function Unirecode( match )
{
return String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16));
}
function MyUnicode2HTML( content, start, end )
{
var entity = "";
for ( var i = start; i < end; i++ )
{
var char_code = content.charCodeAt( i );
var hex = char_code.toString( 16 );
entity += hex;
}
entity += ";";
return entity;
}
function utf8_to_unicode( str )
{
var unicode = new Array();
var values = new Array();
var looking_for = 0;
var n = str.length;
for ( var i=0; i < n; i++ )
{
var val = str.charCodeAt( i );
if ( val < 128 )
{
unicode.push( val );
}
else
{
if ( values.length == 0 )
{
looking_for = (val < 224) ? 2 : 3;
}
values.push( val );
if ( values.length == looking_for )
{
var number = 0;
if ( looking_for == 3 )
{
number = ((values[0] % 16) * 4096)
+ ((values[1] % 64) * 64)
+ ((values[2] % 64) * 1);
}
else
{
number = ((values[0] % 32) * 64)
+ ((values[1] % 64) * 1);
}
unicode.push( number );
values = new Array();
looking_for = 1;
} // if
} // if
} // for
return unicode[0];
}
function UTF8Codepoint( utf8 )
{
var codepoint = 0;
var val = utf8.charCodeAt( 0 );
if ( val < 128 )
{
codepoint = val;
}
else
{
var values = new Array();
var n = utf8.length;
var looking_for = (val < 224) ? 2 : 3;
values.push( val );
for ( var i=1; i < n; i++ )
{
val = utf8.charCodeAt( i );
values.push( val );
}
if ( values.length == looking_for )
{
switch( looking_for )
{
case 3:
codepoint = ((values[0] % 16) * 4096)
+ ((values[1] % 64) * 64)
+ ((values[2] % 64) * 1);
break;
case 2:
codepoint = ((values[0] % 32) * 64)
+ ((values[1] % 64) * 1);
break;
}
}
}
return codepoint;
}
function CodepointToEntity( codepoint )
{
var entity = "" + codepoint.toString( 16 ) + ";";
return entity;
}