Lexical analyzer can split files on tokens. It exports COM interface ITLex
interface ITLex : Idispatch
{
[id(1), helpstring("method AttachFile")]
HRESULT AttachFile([string] char *FileName);
[propget, id(2), helpstring("property PopToken")]
HRESULT PopToken([out, retval] ULONG *pVal);
[propget, id(3), helpstring("property GetNumericToken")]
HRESULT GetNumericToken([out, retval] DOUBLE *pVal);
[propget, id(4), helpstring("property GetCurrentSymbol")]
HRESULT GetCurrentSymbol([out, retval, string, size_is(255)]
char **pVal);
[propget, id(5), helpstring("property PeekToken")]
HRESULT PeekToken([out, retval] ULONG *pVal);
[id(6), helpstring("method Rewind")]
HRESULT Rewind();
[propget, id(7), helpstring("property CurrentPosition")]
HRESULT CurrentPosition([out, retval] ULONG *pVal);
[propputref, id(7), helpstring("property CurrentPosition")]
HRESULT CurrentPosition ([in] ULONG newVal);
[propget, id(8), helpstring("property GetChar")]
HRESULT GetChar([out, retval] CHAR *pVal);
[id(9), helpstring("method AddToken")]
HRESULT AddToken([string] char *PrintName, [in]ULONG LexID);
[id(10), helpstring("method TreatSemicolonAsComment")]
HRESULT TreatSemicolonAsComment([in]ULONG bFlag);
[id(11), helpstring("method AttachString")]
HRESULT AttachString([string] CHAR *szCode);
};