|
-
April 28th, 2012, 07:33 AM
#1
Is this class efficient enough?
I am working on a class library, and I have started with this one class called TextTraverser. It's purpose is to allow me to take either a StreamReader or StringReader and read through it back and forth, keeping track of things like the current offset, line number, and column. Is is also supposed to make it very easy for me to do lexing, but that potential has not yet been fully implemented. All I want to know is, is this code efficient enough? I am worried, with all the converting of StringBuilder to a string, and the counting new lines for each change in position, that the code might be very slow when applied to a large amount of text. I am basically keeping track of all the characters that have already been read, to make it possible to jump back and forth through the text, and I've also implemented a function that is like Substring, only it allows me to get a string that is spread across the read characters and the unread characters. Well, you guys are way smarter than me so I'll just let you look at the code yourself. Hopefully I can get some good feedback on this here code.
Code:
/*
* Author: Guido Arbia
* TextTraverser.cs
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace Guido.Lexing
{
class TextTraverser
{
TextReader _textReader;
int _currentChar;
int _originChar;
StringBuilder _pastChars = new StringBuilder();
int _relativeIndex = 0;
int _absoluteIndex = 0;
TextTraverserRegion _region;
Stack<int> _marks = new Stack<int>();
bool _disableLineTracking = false;
int _currentLine = 0;
int _currentColumn = 0;
public TextTraverser(TextReader textReader)
{
_textReader = textReader;
_currentChar = _textReader.Peek();
if (_currentChar > -1)
_region = TextTraverserRegion.Within;
}
public void Mark()
{
_marks.Push(CurrentOffset);
}
public void ReleaseMark()
{
_marks.Pop();
}
public string Sweep()
{
int lastOffset = _marks.Pop();
int earlyOffset = Math.Min(lastOffset, CurrentOffset);
int lateOffset = Math.Max(lastOffset, CurrentOffset) - earlyOffset;
return ExtractRange(earlyOffset, lateOffset);
}
private string GetPastPartInRange(int start, int length)
{
if (start < _absoluteIndex)
{
int pastStart = _pastChars.Length - (_absoluteIndex - start);
int remainingPastLength = _pastChars.Length - pastStart;
int desiredPastLength = Math.Min(remainingPastLength, length);
if (pastStart < 0 || pastStart + desiredPastLength > _pastChars.Length)
throw new ArgumentOutOfRangeException("start");
return _pastChars.ToString().Substring(pastStart, desiredPastLength);
}
return String.Empty;
}
private string GetFuturePartInRange(int start, int length)
{
int originalOffset = CurrentOffset;
Seek(_absoluteIndex);
if (start + length > _absoluteIndex)
{
int futureStart = Math.Max(_absoluteIndex, start);
int remainingLength = (start + length) - futureStart;
Seek(futureStart);
Step(remainingLength - 1);
if (_currentChar == -1)
throw new ArgumentOutOfRangeException("length");
Step(1);
string futurePart = _pastChars.ToString().Substring(_pastChars.Length - remainingLength);
Seek(originalOffset);
return futurePart;
}
Seek(originalOffset);
return String.Empty;
}
public string ExtractRange(int start, int length)
{
string pastPart = GetPastPartInRange(start, length);
string futurePart = GetFuturePartInRange(start, length);
return pastPart + futurePart;
}
public void Seek(int offset)
{
Step(offset - CurrentOffset);
}
public void Step(int count = 1)
{
Mark();
if (_relativeIndex == 0)
_originChar = _currentChar;
_relativeIndex = _relativeIndex + count;
if (_relativeIndex < 0)
{
if (Math.Abs(_relativeIndex) <= _pastChars.Length)
{
_currentChar = _pastChars[_pastChars.Length + _relativeIndex];
_region = TextTraverserRegion.Within;
}
else
{
_currentChar = -1;
_region = TextTraverserRegion.BeforeFirst;
}
}
else if (_relativeIndex > 0)
{
ReadChars(_relativeIndex);
_relativeIndex = 0;
}
else if (_relativeIndex == 0)
{
_currentChar = _originChar;
if (_currentChar != -1)
_region = TextTraverserRegion.Within;
}
if (!_disableLineTracking)
DoLineTracking();
else
ReleaseMark();
}
private void WindBackToLastLineStart()
{
while (_currentChar != -1 && _currentChar != '\n')
Step(-1);
Step(1);
}
private bool IsMarkAfter()
{
return (_marks.Peek() < CurrentOffset);
}
private void DoLineTracking()
{
bool oldMode = _disableLineTracking;
_disableLineTracking = true;
bool forward = IsMarkAfter();
string sweptString = Sweep();
int lineCount = 0;
for (int i = 0; i < sweptString.Length; i++)
{
char currChar = sweptString[i];
char nextChar = (i + 1 < sweptString.Length)
? sweptString[i + 1]
: Convert.ToChar(0);
if ((currChar == '\r') && (nextChar == '\n'))
{
lineCount++;
i++;
}
else if ((currChar == '\n'))
lineCount++;
}
if (!forward)
lineCount = -lineCount;
_currentLine += lineCount;
int originalOffset = CurrentOffset;
WindBackToLastLineStart();
_currentColumn = originalOffset - CurrentOffset;
Seek(originalOffset);
_disableLineTracking = oldMode;
}
public int CurrentLine
{
get { return _currentLine; }
}
public int CurrentOffset
{
get { return _absoluteIndex + _relativeIndex; }
}
public int CurrentColumn
{
get { return _currentColumn; }
}
public TextTraverserRegion CurrentRegion
{
get { return _region; }
}
public char CurrentChar
{
get {return Convert.ToChar(_currentChar); }
}
private void ReadChars(int count)
{
char[] buffer = new char[count];
int charsRead = _textReader.Read(buffer, 0, count);
_pastChars.Append(buffer);
_currentChar = _textReader.Peek();
if (_currentChar == -1)
_region = TextTraverserRegion.AfterLast;
else
_region = TextTraverserRegion.Within;
_absoluteIndex += charsRead;
}
}
}
Last edited by Guidosoft; April 28th, 2012 at 07:39 AM.
-
April 28th, 2012, 10:42 AM
#2
Re: Is this class efficient enough?
 Originally Posted by Guidosoft
All I want to know is, is this code efficient enough? I am worried, with all the converting of StringBuilder to a string, and the counting new lines for each change in position, that the code might be very slow when applied to a large amount of text.
Given that the code is already written (and presumably working ), the best way to determine this is to run it on a large amount of text and see how fast it is.
Looking at code to try to find places where it can be optimized is pointless if it is already fast enough. And if it isn't fast enough, you should still run it, through a profiler, to see exactly which bits are taking the most time. Trying to guess this by looking at the code is generally not easy and you could end up optimizing parts which aren't the problem.
-
April 28th, 2012, 10:46 AM
#3
Re: Is this class efficient enough?
Best Regards,
BioPhysEngr
http://blog.biophysengr.net
--
All advice is offered in good faith only. You are ultimately responsible for effects of your programs and the integrity of the machines they run on.
-
April 28th, 2012, 05:54 PM
#4
Re: Is this class efficient enough?
Okay, thanks for the response. So, I don't have the full version of Visual Studio on my personal machine. I'm guessing that would have the tools I need for analyzing my code.
I did, however, find some problems with my code that was making it eternally slow when dealing with a 10000 line file. I can loop through that with StreamReader in less than a second. So, I realized that my code was creating numerous strings, one for each character in the text, on several occasions. So I had to use StringBuilder to optimize the code. Here's the code I have now but it is still way too slow. At least it manages to finish the file in an observable time.
Code:
/*
* Author: Guido Arbia
* TextTraverser.cs
*/
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace Guido.Lexing
{
class TextTraverser
{
TextReader _textReader;
int _currentChar;
int _originChar;
StringBuilder _pastChars = new StringBuilder();
int _relativeIndex = 0;
int _absoluteIndex = 0;
TextTraverserRegion _region;
Stack<int> _marks = new Stack<int>();
bool _disableLineTracking = false;
int _currentLine = 0;
int _currentColumn = 0;
int _currentLineOffset = 0;
Queue<StringBuilder> usedBuffers = new Queue<StringBuilder>();
Stack<StringBuilder> claimedBuffers = new Stack<StringBuilder>();
Dictionary<int, int> _lineOffsets = new Dictionary<int, int>();
Dictionary<int, int> _offsetLines = new Dictionary<int, int>();
private StringBuilder ClaimBuffer()
{
if (usedBuffers.Count > 0)
claimedBuffers.Push(usedBuffers.Dequeue());
else
claimedBuffers.Push(new StringBuilder());
return claimedBuffers.Peek();
}
private void ReleaseBuffer()
{
usedBuffers.Enqueue(claimedBuffers.Pop());
}
private StringBuilder GetCurrentBuffer()
{
return claimedBuffers.Peek();
}
public TextTraverser(TextReader textReader)
{
_textReader = textReader;
_currentChar = _textReader.Peek();
if (_currentChar > -1)
{
_region = TextTraverserRegion.Within;
_lineOffsets[0] = 0;
_offsetLines[0] = 0;
}
}
public void Mark()
{
_marks.Push(CurrentOffset);
}
public void ReleaseMark()
{
_marks.Pop();
}
public string Sweep(StringBuilder builder = null)
{
int lastOffset = _marks.Pop();
int earlyOffset = Math.Min(lastOffset, CurrentOffset);
int lateOffset = Math.Max(lastOffset, CurrentOffset) - earlyOffset;
if (builder == null)
return ExtractRange(earlyOffset, lateOffset);
else
ExtractRange(earlyOffset, lateOffset, builder);
return String.Empty;
}
private string GetPastPartInRange(int start, int length, StringBuilder builder = null)
{
if (start < _absoluteIndex)
{
int pastStart = _pastChars.Length - (_absoluteIndex - start);
int remainingPastLength = _pastChars.Length - pastStart;
int desiredPastLength = Math.Min(remainingPastLength, length);
if (pastStart < 0 || pastStart + desiredPastLength > _pastChars.Length)
throw new ArgumentOutOfRangeException("start");
if (builder == null)
return _pastChars.ToString().Substring(pastStart, desiredPastLength);
else
{
for (int i = 0; i < desiredPastLength; i++)
builder.Append(_pastChars[pastStart + i]);
}
}
return String.Empty;
}
private string GetFuturePartInRange(int start, int length, StringBuilder builder = null)
{
int originalOffset = CurrentOffset;
Seek(_absoluteIndex);
if (start + length > _absoluteIndex)
{
int futureStart = Math.Max(_absoluteIndex, start);
int remainingLength = (start + length) - futureStart;
Seek(futureStart);
Step(remainingLength - 1);
if (_currentChar == -1)
throw new ArgumentOutOfRangeException("length");
Step(1);
string futurePart = String.Empty;
if (builder == null)
futurePart = _pastChars.ToString().Substring(_pastChars.Length - remainingLength);
else
{
for (int i = 0; i < remainingLength; i++)
builder.Append(_pastChars[_pastChars.Length - remainingLength + i]);
}
Seek(originalOffset);
return futurePart;
}
Seek(originalOffset);
return String.Empty;
}
public string ExtractRange(int start, int length, StringBuilder builder = null)
{
if (builder == null)
{
string pastPart = GetPastPartInRange(start, length);
string futurePart = GetFuturePartInRange(start, length);
return pastPart + futurePart;
}
else
{
GetPastPartInRange(start, length, builder);
GetFuturePartInRange(start, length, builder);
return String.Empty;
}
}
public void Seek(int offset)
{
Step(offset - CurrentOffset);
}
public void Step(int count = 1)
{
if (count == 0)
return;
Mark();
if (_relativeIndex == 0)
_originChar = _currentChar;
_relativeIndex = _relativeIndex + count;
if (_relativeIndex < 0)
{
if (Math.Abs(_relativeIndex) <= _pastChars.Length)
{
_currentChar = _pastChars[_pastChars.Length + _relativeIndex];
_region = TextTraverserRegion.Within;
}
else
{
_currentChar = -1;
_region = TextTraverserRegion.BeforeFirst;
}
}
else if (_relativeIndex > 0)
{
ReadChars(_relativeIndex);
_relativeIndex = 0;
}
else if (_relativeIndex == 0)
{
_currentChar = _originChar;
if (_currentChar != -1)
_region = TextTraverserRegion.Within;
}
if (!_disableLineTracking)
DoLineTracking();
else
ReleaseMark();
}
private void WindBackToLastLineStart()
{
while (_currentChar != -1 && _currentChar != '\n')
Step(-1);
Step(1);
}
private bool IsMarkAfter()
{
return (_marks.Peek() > CurrentOffset);
}
private void DoForwardLineTracking()
{
StringBuilder sweptText = ClaimBuffer();
sweptText.Clear();
int lowestCurrentPosition = _marks.Peek();
Sweep(sweptText);
int lineCount = 0;
int lastLinePos = 0;
for (int i = 0; i < sweptText.Length; i++)
{
char currChar = sweptText[i];
if ((currChar == '\n'))
{
lineCount++;
lastLinePos = i;
}
}
ReleaseBuffer();
int originalOffset = CurrentOffset;
int originalLine = _currentLine;
_currentLine += lineCount;
if (_currentLine > originalLine)
{
_currentLineOffset = lowestCurrentPosition + lastLinePos;
_lineOffsets[_currentLine] = _currentLineOffset;
_offsetLines[_currentLineOffset] = _currentLine;
}
_currentColumn = originalOffset - 1 - _currentLineOffset;
}
private void DoBackwardLineTracking()
{
int originalOffset = CurrentOffset;
WindBackToLastLineStart();
_currentLine = _offsetLines[CurrentOffset];
_currentColumn = originalOffset - CurrentOffset;
Seek(originalOffset);
}
void DoLineTracking()
{
bool oldMode = _disableLineTracking;
_disableLineTracking = true;
if (!IsMarkAfter())
DoForwardLineTracking();
else
DoBackwardLineTracking();
_disableLineTracking = oldMode;
}
public int CurrentLine
{
get { return _currentLine; }
}
public int CurrentOffset
{
get { return _absoluteIndex + _relativeIndex; }
}
public int CurrentColumn
{
get { return _currentColumn; }
}
public TextTraverserRegion CurrentRegion
{
get { return _region; }
}
public char CurrentChar
{
get {return Convert.ToChar(_currentChar); }
}
private void ReadChars(int count)
{
char[] buffer = new char[count];
int charsRead = _textReader.Read(buffer, 0, count);
_pastChars.Append(buffer);
_currentChar = _textReader.Peek();
if (_currentChar == -1)
_region = TextTraverserRegion.AfterLast;
else
_region = TextTraverserRegion.Within;
_absoluteIndex += charsRead;
}
}
}
Here is the test:
Code:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using Guido.Lexing;
namespace Guido
{
class Program
{
static void Main(string[] args)
{
//StringReader strReader = new StringReader("This is text.\nIs it not?\nEff oo\nOook!");
using (StreamWriter writer = new StreamWriter(File.Create(@"C:\Users\Admin\Desktop\oak.txt")))
{
for (int i = 0; i < 100000; i++)
writer.WriteLine(Guid.NewGuid().ToString());
}
Console.WriteLine("Reading.");
using (StreamReader reader = new StreamReader(File.OpenRead(@"C:\Users\Admin\Desktop\oak.txt")))
{
/*
int i = 0;
while (!reader.EndOfStream)
{
reader.Read();
i++;
}*/
//Console.WriteLine(i);
TextTraverser traverser = new TextTraverser(reader);
while (traverser.CurrentRegion == TextTraverserRegion.Within)
{
traverser.Step();
//if (traverser.CurrentLine % 1000 == 0 && traverser.CurrentColumn == 0)
// Console.WriteLine(traverser.CurrentLine);
}
Console.WriteLine("Total Lines: " + traverser.CurrentLine+1);
traverser.Seek(0);
while (true)
{
int moveBy = 0;
int.TryParse(Console.ReadLine(), out moveBy);
traverser.Step(moveBy);
if (traverser.CurrentRegion == TextTraverserRegion.Within)
Console.WriteLine("Current Char: " + traverser.CurrentChar);
Console.WriteLine("Current Region: " + traverser.CurrentRegion);
Console.WriteLine("Current Offset: " + traverser.CurrentOffset);
Console.WriteLine("Current Line: " + traverser.CurrentLine);
}
}
Console.ReadKey();
}
}
}
-
April 29th, 2012, 09:42 AM
#5
Re: Is this class efficient enough?
 Originally Posted by Guidosoft
Okay, thanks for the response. So, I don't have the full version of Visual Studio on my personal machine. I'm guessing that would have the tools I need for analyzing my code.
Yes, I don't think the Express edition has a profiler. But have you tried Mono? This is a free, open-source C# compiler which is binary compatible with Visual Studio, and it has a profiler also. So you should be able to run your executable through this.
If this doesn't work, then as a last resort you could try the 'run and pause' method. This means you run the program, and pause it randomly. Then note down the call stack. Do this enough times and it will give a reasonable indication of where the code is spending the most time - the more time it spends in each method, the more likely that method is to be on the call stack.
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|