CSVFile

A CSVFile file is instantiated by passing the content of a loaded CSV file into the contructor. The implementation assumes that the first line of the CSV contains column titles that correctly correspond to included data.

CSVFile( file_content )
CSVFile.prototype.getValueFor
CSVFile.prototype.getValue
CSVFile.prototype.getNrOfRows

Generally, a CSVFile is loaded and then iterated through to retrieve values for some purpose. A common use is to load the values into an HTML table (see Load.Table).

var csv_file = new CSVFile( text );
var n        = csv_file.getNrOfRows();

for ( var i=0; i < n; i++ )
{
    var username = csv_file.getValueFor( i, ["First name", "Firstname", "Given name", "Given names"] );
    ...
}

CSVFile

CSVFile( file_content )

function CSVFile( file_content )
{
	this.headers = new Array();
	this.rows    = new Array();

	this.parseContent( file_content );
}

getValueFor

getValueFor( row, labels )

Searches the 'row' specified for the first of the labels specified in array that has a value. This allows CSV files that have inconsistent column titles, e.g., "Given name" or "Given names", to be searched.

CSVFile.prototype.getValueFor
=
function( row, labels )
{
	var value = "";
	var n     = labels.length;

	for ( var i=0; i < n; i++ )
	{
		var value = this.getValue( row, labels[i] );
		
		if ( "" != value ) break;
	}

	if ( undefined == value ) value = "";

	return value;
}
CSVFile.prototype.getValue
=
function( row, label )
{
	var value  = "";
	var column = -1;
	var n      = this.headers.length;
	
	for ( var i=0; i < n; i++ )
	{
		var text1 = label.toLowerCase().trim();
		var text2 = this.headers[i].toLowerCase().trim();
	
		if ( text1 == text2 )
		{
			column = i; break;
		}
	}

	if ( -1 != column )
	{
		var array = this.rows[row];
			value = array[column];
	}
	
	return value;
}
CSVFile.prototype.getNrOfRows
=
function()
{
	return this.rows.length;
}

Helper functions

CSVFile.prototype.CSVLineRE = new RegExp('("[\\w ,]+" ?|[\\w ]*), ?("[\\w ,]+" ?|[\\w ]*)$');

CSVFile.prototype.parseContent
=
function( file_content )
{
	var line_reader = new CSVFile.LineReader( file_content, 100000 );
	var line  = line_reader.readLine();
	if ( false !== line )
	{
        var trimmed = line.trim(); //trims CRs from Mac apps such as Numbers.
        //if (!this.CSVLineRE.test(trimmed)) throw "Header of file does not match expected Excel/LibreOffice CSV format."
		this.headers = CSVFile.SplitAndTrim( trimmed );


		while ( (false !== (line = line_reader.readLine())) )
		{
			var fields = CSVFile.SplitAndTrim( line );

			if ( "" != fields.join( "" ) )
			{
				this.rows.push( fields );
			}
		}
	}
}

CSVFile.SplitAndTrim
=
function( line )
{
	var line  = line.replace( /^/, '' );
	var array = new Array();
	var bits  = CSVFile.SplitWhileRespectingQuotes( line, "," );
	var n     = bits.length;
	
	for ( var i=0; i < n; i++ )
	{
		var field = bits[i];
			field = field.trim();

		if ( "" != field )
		{
			var x     = field.length - 1;

			if ( '"' == field.charAt( x ) ) field = field.substring( 0, x );
			if ( '"' == field.charAt( 0 ) ) field = field.substring( 1    );

			field = field.trim();
		}
	
		array.push( field );
	}

	return array;
}

CSVFile.SplitWhileRespectingQuotes
=
function( line, delimiter )
{
	var array = Array();
	var out   = true;
	var s     = 0;
	var n     = line.length;
	
	for ( var i=0; i < n; i++ )
	{
		switch ( line.charAt( i ) )
		{
		case '"':
			out = !out;
			break;
			
		case delimiter:
			if ( out )
			{
				array.push( line.substring( s, i ) );
				s = i + 1;
			}
			break;
		}
	}
	
	if ( s < n )
	{
		array.push( line.substring( s, n ) );
	}
	
	return array;
}

CSVFile.LineReader
=
function( file_content, limit )
{
	this.content = CSVFileLineReaderUnicodeStrip( file_content );
	this.pos     = 0;
	this.lines   = 0;
	this.limit   = limit;
}

CSVFile.LineReader.prototype.readLine
=
function()
{
	var line     = false;
	var loop     = true;
	var in_quote = false;

	if ( ++this.lines < this.limit )
	{
		if ( this.pos < this.content.length )
		{
			line = "";
		
			while ( this.pos < this.content.length )
			{
				var ch = this.content[this.pos++];

				if ( 127 < ch )
				{
					// ignore 
				}
				else
				if ( '"' == ch )
				{
					in_quote = !in_quote;
					line += ch;
				}
				else
				if ( '\n' == ch )
				{
					if ( in_quote ) line += ch;
					else            break;
				}
				else if ( '\r' == ch )
				{
					if ( in_quote ) line += ch;
					else
					{
						if ( '\n' == this.content[this.pos] )
						{
							this.pos++;
						}
						break;
					}
				}
				else
				{
					line += ch;
				}
			}
		}
	}

	return line;
}

CSVFile.LineReader.prototype.unidecode
=
CSVFileLineReaderUnicodeStrip;

function CSVFileLineReaderUnicodeStrip( content )
{
	content = CSVFile.LineReader.prototype.unirecode( content );

	var str = "";
	var n   = content.length;
	var i   = 0;

	while ( i < n )
	{
		var ch = content.charCodeAt(i);

		if ( (ch == (0xFC | ch)) && (i+5 < n) )	// 6 byte unicode
		{
			i += 6;
		}
		else
		if ( (ch == (0xF8 | ch)) && (i+4 < n) )	// 5 byte unicode
		{
			i += 5;
		}
		else
		if ( (ch == (0xF0 | ch)) && (i+3 < n) )	// 4 byte unicode
		{
			i += 4;
		}
		else
		if ( (ch == (0xE0 | ch)) && (i+2 < n) )	// 3 byte unicode
		{
			var tmp1 = utf8_to_unicode( content.substring( i, i + 3 ) );
			var tmp2 = UTF8Codepoint( content.substring( i, i + 3 ) );
			var ent  = CodepointToEntity( tmp2 );

			str += ent;

			i += 3;
		}
		else
		if ( (ch == (0xC0 | ch)) && (i+1 < n) )	// 2 byte unicode
		{
			i += 2;
		}
		else
		if ( (ch == (0xC0 | ch)) && (i+1 < n) )	// 2 byte unicode
		{
			i += 2;
		}
		else
		if ( (ch == (0x80 | ch)) )	            // extra byte
		{
			console.log( "Warning detected invalid unicode" );
			i += 1;
		}
		else
		{
			str += content[i];		         	// ascii character
			i++;
		}
	}
	return str;
}


CSVFile.LineReader.prototype.unirecode
=
CSVFileLineReaderUnicodeRecode;

/*
 *	Based on:
 *	https://stackoverflow.com/questions/17267329/converting-unicode-character-to-string-format
 */

function CSVFileLineReaderUnicodeRecode( content )
{
	return content.replace( /\\u[\dA-F]{3}/gi, Unirecode );
}

function Unirecode( match )
{
	return String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16));
}

function MyUnicode2HTML( content, start, end )
{
	var entity = "&#x";

	for ( var i = start; i < end; i++ )
	{
		var char_code = content.charCodeAt( i );
		var hex       = char_code.toString( 16 );

		entity += hex;
	}
	entity += ";";

	return entity;
}

function utf8_to_unicode( str )
{
	var unicode     = new Array();
	var values      = new Array();
	var looking_for = 0;
	var n           = str.length;

	for ( var i=0; i < n; i++ )
	{
		var val = str.charCodeAt( i );

		if ( val < 128 )
		{
			unicode.push( val );
		}
		else
		{
			if ( values.length == 0 )
			{
				looking_for = (val < 224) ? 2 : 3;
			}

			values.push( val );

			if ( values.length == looking_for )
			{
				var number = 0;

				if ( looking_for == 3 )
				{
					number = ((values[0] % 16) * 4096)
						   + ((values[1] % 64) *   64)
						   + ((values[2] % 64) *    1);
				}
				else
				{
					number = ((values[0] % 32) * 64)
					       + ((values[1] % 64) *  1);
				}

				unicode.push( number );
				values = new Array();
				looking_for = 1;
			} // if
		} // if
	} // for

	return unicode[0];
}

function UTF8Codepoint( utf8 )
{
	var codepoint   = 0;
	var val         = utf8.charCodeAt( 0 );

	if ( val < 128 )
	{
		codepoint = val;
	}
	else
	{
		var values      = new Array();
		var n           = utf8.length;
		var looking_for = (val < 224) ? 2 : 3;

		values.push( val );

		for ( var i=1; i < n; i++ )
		{
			val = utf8.charCodeAt( i );

			values.push( val );
		}

		if ( values.length == looking_for )
		{
			switch( looking_for )
			{
			case 3:
				codepoint = ((values[0] % 16) * 4096)
					      + ((values[1] % 64) *   64)
					      + ((values[2] % 64) *    1);
				break;

			case 2:
  				codepoint = ((values[0] % 32) *   64)
  						  + ((values[1] % 64) *    1);
  				break;
  			}
  		}
  	}

  	return codepoint;
}

function CodepointToEntity( codepoint )
{
	var entity = "&#x" + codepoint.toString( 16 ) + ";";

	return entity;
}