Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | RSS feed
#include "bod_text_parser.h"
#include "../common/strutils.h"
#include <memory.h>
//---------------------------------------------------------------------------------
// TOKEN
const char* bod_text_parser::token::specialChars[]={ "{", "}", ";", ":", "/!", "!/" };
char bod_text_parser::token::tabWidth;
const char* bod_text_parser::token::getText() const
{
if(type >= t_text)
return text;
else
return specialChars[(int)type];
}
//---------------------------------------------------------------------------------
void bod_text_parser::preParseBuffer(char *pszBuffer, size_t size)
{
//m_lineCount=parseLines(pszBuffer, size, &m_lines);
m_pszBuffer=pszBuffer;
m_buffLen=size;
}
//---------------------------------------------------------------------------------
size_t bod_text_parser::parseBuffer(size_t limit)
{
size_t old=tokens.size();
/*for(size_t i=m_newLineIdx; i < m_lineCount; i++){
parseLine(m_lines[i], (int)i + 1);
if(limit!=-1 && ((tokens.size() - old) >= limit)){
i++;
break;
}
}
m_newLineIdx=i;*/
char *ln;
while(ln=nextLine()){
parseLine(ln, ++m_lineIdx);
if(limit!=-1 && ((tokens.size() - old) >= limit)){
break;
}
}
return tokens.size() - old;
}
//---------------------------------------------------------------------------------
void bod_text_parser::parseLine(char *line, int idx)
{
char *old=line, *pos=line, ch;
token *t;
while(*pos!=0){
ch=*pos;
if(ch==';' || ch==':') {
*pos=0;
if(*old!=0){
t=new token();
t->type=token::t_text;
t->line=idx;
t->text=old;
t->col=(int)(old - line + 1);
tokens.push_back(t);
}
t=new token();
t->type=(ch==';' ? token::t_semicolon : token::t_colon);
t->line=idx;
t->col=(int)(pos - line + 1);
tokens.push_back(t);
old=pos + 1;
}
// space or tabulator
else if(ch==' ' || ch==0x9){
*pos=0;
if(*old!=0){
t=new token();
t->type=token::t_text;
t->line=idx;
t->text=old;
t->col=(int)(old - line + 1);
tokens.push_back(t);
}
old=pos + 1;
}
else if(ch=='{' || ch=='}') {
*pos=0;
if(*old!=0){
t=new token();
t->text=old;
t->line=idx;
t->col=(int)(old - line) + 1;
t->type=token::t_text;
tokens.push_back(t);
}
t=new token();
t->type=(ch=='{' ? token::t_openBracket : token::t_closeBracket);
t->line=idx;
t->col=(int)(old - line + 1);
old=pos + 1;
tokens.push_back(t);
}
// either comment, header info or processing instruction
else if(ch=='/') {
if(pos[1]=='#'){
t=new token();
t->text=pos + 2;
while(*t->text!=0 && *t->text==' ')
t->text++;
if(*t->text==0)
delete t;
else{
t->line=idx;
t->col=(int)(pos - line) + 1;
t->type=token::t_hdrinfo;
tokens.push_back(t);
}
}
// open proc instruction /!
if(pos[1]=='!') {
t=new token();
t->type=token::t_openInstrBlock;
t->line=idx;
t->col=(int)(pos - line) + 1;
tokens.push_back(t);
old=pos + 2;
pos++;
}
// comment
else if(ignoreRemarks()==false){
*pos=0;
break;
}
}
// close proc instruction !/
else if(ch=='!' && pos[1]=='/'){
*pos=0;
if(*old!=0){
t=new token();
t->text=old;
t->line=idx;
t->col=(int)(old - line) + 1;
t->type=token::t_text;
tokens.push_back(t);
}
t=new token();
t->type=token::t_closeInstrBlock;
t->line=idx;
t->col=(int)(pos - line) + 1;
tokens.push_back(t);
old=pos + 2;
pos++;
}
pos++;
}
// if there are some chars left after last delimeter (usually ;) this will process them
if(old!=pos){
t=new token();
t->text=old;
t->line=idx;
t->col=(int)(old - line) + 1;
t->type=token::t_text;
tokens.push_back(t);
}
}
//---------------------------------------------------------------------------------
/*size_t bod_text_parser::parseLines(char *buffer, size_t size, char ***array_ptr)
{
size_t lineCount=1;
size_t i;
for(i=0; i < size; i++){
if(buffer[i]==0xD){
buffer[i]=0;
lineCount++;
if(((i + 1) < size) && buffer[i+1]==0xA)
buffer[i+1]=1;
}
else if(buffer[i]==0xA){
buffer[i]=0;
lineCount++;
}
}
char **lines=new char*[lineCount];
char *ptr=buffer, *end=buffer + size;
i=0;
do{
if(ptr && *ptr==1)
ptr++;
lines[i]=ptr;
ptr=(char*)memchr(ptr, 0, end - ptr);
if(ptr) ptr++;
// this will protect us from binary files where are more zeroes than we expect
if(++i==lineCount)
break;
}
while(ptr);
*array_ptr=lines;
return lineCount;
}*/
//---------------------------------------------------------------------------------
char * bod_text_parser::nextLine()
{
/*
size_t i, size=m_buffLen;
char *line=0, *buffer=(char*)m_pszBuffer;
for(i=m_lastCharPos; i < size; i++){
if(buffer[i]==0xD){
buffer[i]=0;
if(((i + 1) < size) && buffer[i+1]==0xA)
buffer[i+1]=1;
line=buffer + m_lastCharPos;
break;
}
else if(buffer[i]==0xA){
buffer[i]=0;
line=buffer + m_lastCharPos;
break;
}
}
if(line==0) line=buffer + m_lastCharPos;
if(*line==0) line++;
if(m_lastCharPos==size) return NULL;
m_lastCharPos=i;
if(line[0]==1)
line++;
return line;
*/
size_t i, size=m_buffLen;
char *line=0, *buffer=(char*)m_pszBuffer;
for(i=m_lastPos; i < size; i++){
if(buffer[i]==0xD){
buffer[i]=0;
m_newPos=i;
if(((i + 1) < size) && buffer[i+1]==0xA) {
m_newPos++;
}
m_newPos++;
line=buffer + m_lastPos;
break;
}
else if(buffer[i]==0xA){
buffer[i]=0;
m_newPos=i + 1;
line=buffer + m_lastPos;
break;
}
}
if(m_lastPos > size) return NULL;
if(line==0) {
line=buffer + m_lastPos;
m_newPos=size + 1;
}
m_lastPos=m_newPos;
return line;
}
//---------------------------------------------------------------------------------