Subversion Repositories spk

Rev

Rev 1 | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 1 Rev 114
Line 1... Line 1...
1
#include "bod_text_parser.h"
1
#include "bod_text_parser.h"
2
#include "../common/strutils.h"
2
#include "../common/strutils.h"
3
 
3
 
4
#include <memory.h>
4
#include <memory.h>
5
//---------------------------------------------------------------------------------
5
//---------------------------------------------------------------------------------
-
 
6
// ERROR
-
 
7
const char* bod_text_parser::Error::m_messages[]={
-
 
8
	"No error",
-
 
9
	"Newline in constant"
-
 
10
};
-
 
11
 
6
// TOKEN
12
// TOKEN
7
const char* bod_text_parser::token::specialChars[]={ "{", "}", ";", ":", "/!", "!/" };
13
const char* bod_text_parser::token::specialChars[]={ "{", "}", ";", ":", "=", "/!", "!/", "/#", "(", ")", "/", ".", "+" };
8
char bod_text_parser::token::tabWidth;
14
char bod_text_parser::token::tabWidth;
9
 
15
 
10
const char* bod_text_parser::token::getText() const
16
const char* bod_text_parser::token::getText() const
11
{ 
17
{ 
12
	if(type >= t_text)
18
	if(type >= t_text)
Line 14... Line 20...
14
	else
20
	else
15
		return specialChars[(int)type];
21
		return specialChars[(int)type];
16
}
22
}
17
//---------------------------------------------------------------------------------
23
//---------------------------------------------------------------------------------
18
void bod_text_parser::preParseBuffer(char *pszBuffer, size_t size)
24
void bod_text_parser::preParseBuffer(char *pszBuffer, size_t size)
19
{
25
{
20
	//m_lineCount=parseLines(pszBuffer, size, &m_lines);
-
 
21
	m_pszBuffer=pszBuffer;
26
	m_pszBuffer=pszBuffer;
22
	m_buffLen=size;
27
	m_buffLen=size;
23
}	
28
}	
24
//---------------------------------------------------------------------------------
29
//---------------------------------------------------------------------------------
25
size_t bod_text_parser::parseBuffer(size_t limit)
30
size_t bod_text_parser::parseBuffer(size_t limit)
26
{
31
{
-
 
32
	char *ln;
27
	size_t old=tokens.size();
33
	size_t old=tokens.size();
28
	/*for(size_t i=m_newLineIdx; i < m_lineCount; i++){
34
	while(ln=nextLine()){
29
		parseLine(m_lines[i], (int)i + 1);
35
		if(parseLine(ln, ++m_lineIdx)==false)
30
		if(limit!=-1 && ((tokens.size() - old) >= limit)){
-
 
31
			i++;
-
 
32
			break;
36
			break;
33
		}
37
			
34
	}
-
 
35
	m_newLineIdx=i;*/
-
 
36
	
-
 
37
	char *ln;
-
 
38
	while(ln=nextLine()){
-
 
39
		parseLine(ln, ++m_lineIdx);
-
 
40
		if(limit!=-1 && ((tokens.size() - old) >= limit)){
38
		if(limit!=-1 && ((tokens.size() - old) >= limit)){
41
			break;
39
			break;
42
		}
40
		}
43
	}
41
	}
44
	return tokens.size() - old;
42
	return tokens.size() - old;
45
}
43
}
46
//---------------------------------------------------------------------------------
44
//---------------------------------------------------------------------------------
-
 
45
#define PUSH_PREVIOUS_STRING() { t=new token(); \
-
 
46
t->type=token::t_text; \
-
 
47
t->line=idx; \
-
 
48
t->text=old; \
-
 
49
t->col=(int)(old - line + 1); \
-
 
50
tokens.push_back(t); }
-
 
51
 
-
 
52
#define PUSH_TOKEN(_type, _col, _text) { t=new token(); \
-
 
53
t->type=_type; \
-
 
54
t->line=idx; \
-
 
55
t->col=(int)(_col); \
-
 
56
t->text=_text; \
-
 
57
tokens.push_back(t); }
-
 
58
 
47
void bod_text_parser::parseLine(char *line, int idx)
59
bool bod_text_parser::parseLine(char *line, int idx)
48
{
60
{
49
	char *old=line, *pos=line, ch;
61
	char *old=line, *pos=line, ch;
50
	token *t;
62
	token *t;
-
 
63
	bool bInQuotedString=false;
51
	
64
	
52
	while(*pos!=0){
65
	while(*pos!=0){
53
		ch=*pos;
66
		ch=*pos;
-
 
67
		
-
 
68
		if(bInQuotedString || (ch=='"' && testFlag(parseQuotedStrings))){
54
		if(ch==';' || ch==&apos;:') {
69
			if(ch=='&quot;'){
-
 
70
				bInQuotedString=!bInQuotedString;
-
 
71
				
-
 
72
				*pos=0;
-
 
73
				if(bInQuotedString){
-
 
74
					if(*old!=0)
-
 
75
						PUSH_PREVIOUS_STRING();
-
 
76
				}	
-
 
77
				// got string
-
 
78
				else
-
 
79
					PUSH_TOKEN(token::t_quotedString, old - line, old);
-
 
80
					
-
 
81
				old=pos + 1;
-
 
82
			}
-
 
83
			else{
-
 
84
				++pos;
-
 
85
				continue;
-
 
86
			}
-
 
87
		}
-
 
88
		
-
 
89
		else if(ch==';' || ch==':' || (ch=='=' && testFlag(parseEqual)) || 
-
 
90
		(testFlag(parseOperators) && (ch=='.' || ch=='+'))) {
55
			*pos=0;
91
			*pos=0;
56
			
92
			
57
			if(*old!=0){
93
			if(*old!=0)
-
 
94
				PUSH_PREVIOUS_STRING();
-
 
95
			
58
				t=new token();
96
			token::Type ty;
-
 
97
			switch(ch){
-
 
98
				case ';':
59
				t->type=token::t_text;
99
					ty=token::t_semicolon;
60
				t->line=idx;
100
					break;
61
				t-&gt;text=old;
101
				case &apos;:&apos;:
62
				t->col=(int)(old - line + 1);
102
					ty=token::t_colon;
-
 
103
					break;
-
 
104
				case '=':
63
				tokens.push_back(t);
105
					ty=token::t_equal;
64
			}	
106
					break;
65
			t=new token();
107
				case &apos;.':
66
			t->type=(ch==';' ? token::t_semicolon : token::t_colon);
108
					ty=token::t_dot;
67
			t->line=idx;
109
					break;
68
			t-&gt;col=(int)(pos - line + 1);
110
				case &apos;+&apos;:
69
			tokens.push_back(t);
111
					ty=token::t_plus;
-
 
112
					break;
-
 
113
			}
-
 
114
			PUSH_TOKEN(ty, pos - line + 1, 0);
70
			
115
			
71
			old=pos + 1;
116
			old=pos + 1;
72
		}
117
		}
73
		// space or tabulator
118
		// space or tabulator
74
		else if(ch==' ' || ch==0x9){
119
		else if(ch==' ' || ch==0x9){
75
			*pos=0;
120
			*pos=0;
76
			if(*old!=0){
121
			if(*old!=0)
77
				t=new token();
-
 
78
				t->type=token::t_text;
-
 
79
				t->line=idx;
-
 
80
				t->text=old;
-
 
81
				t->col=(int)(old - line + 1);
-
 
82
				tokens.push_back(t);
122
				PUSH_PREVIOUS_STRING();
83
			}
123
			
84
			old=pos + 1;
124
			old=pos + 1;
85
		}
125
		}
-
 
126
		// brackets
86
		else if(ch=='{' || ch=='}') {
127
		else if(ch=='{' || ch=='}' || (testFlag(parseStdBrackets) && (ch=='(' || ch==')'))) {
87
			*pos=0;
128
			*pos=0;
88
			if(*old!=0){
129
			if(*old!=0)
-
 
130
				PUSH_PREVIOUS_STRING();
-
 
131
			
89
				t=new token();
132
			token::Type ty;
-
 
133
			switch(ch){
-
 
134
				case '{':
-
 
135
					ty=token::t_openCrBracket;
90
				t->text=old;
136
					break;
91
				t-&gt;line=idx;
137
				case &apos;}&apos;:
92
				t->col=(int)(old - line) + 1;
138
					ty=token::t_closeCrBracket;
-
 
139
					break;
-
 
140
				case '(':
93
				t->type=token::t_text;
141
					ty=token::t_openStdBracket;
-
 
142
					break;
-
 
143
				case ')':
94
				tokens.push_back(t);
144
					ty=token::t_closeStdBracket;
-
 
145
					break;
95
			}
146
			}
96
			t=new token();
147
			PUSH_TOKEN(ty, pos - line + 1, 0);
97
			t->type=(ch=='{' ? token::t_openBracket : token::t_closeBracket);
-
 
98
			t->line=idx;
148
			
99
			t->col=(int)(old - line + 1);
-
 
100
			old=pos + 1;
149
			old=pos + 1;
101
			
-
 
102
			tokens.push_back(t);
-
 
103
		}
150
		}
104
		// either comment, header info or processing instruction
151
		// either comment, header info or processing instruction
105
		else if(ch=='/') {
152
		else if(ch=='/') {
106
			if(pos[1]=='#'){
153
			if(pos[1]=='#'){
107
				t=new token();
154
				t=new token();
Line 114... Line 161...
114
					t->line=idx;
161
					t->line=idx;
115
					t->col=(int)(pos - line) + 1;
162
					t->col=(int)(pos - line) + 1;
116
					t->type=token::t_hdrinfo;
163
					t->type=token::t_hdrinfo;
117
					tokens.push_back(t);
164
					tokens.push_back(t);
118
				}
165
				}
119
			}
166
			}
120
			// open proc instruction /!
167
			// open proc instruction /!
121
			if(pos[1]=='!') {
168
			if(pos[1]=='!') {
122
				
-
 
123
				t=new token();
-
 
124
				t->type=token::t_openInstrBlock;
169
				PUSH_TOKEN(token::t_openInstrBlock, pos - line + 1, 0);
125
				t->line=idx;
-
 
126
				t->col=(int)(pos - line) + 1;
-
 
127
				tokens.push_back(t);
-
 
128
				old=pos + 2;
170
				old=pos + 2;
129
				pos++;
171
				pos++;
130
			}
172
			}
131
			// comment
173
			// slash
-
 
174
			else{
-
 
175
				/* if ignore slash is on then do nothing
-
 
176
				  else treat it as comment unless C comments are specified, in which case there must
-
 
177
				  be // to make comment, otherwise / is treated as token
-
 
178
				*/
132
			else if(ignoreRemarks()==false){
179
				if(testFlag(parseIgnoreSlash)==false){
-
 
180
					if(testFlag(parseBODComments)) {
-
 
181
						*pos=0;
-
 
182
						pos--;
-
 
183
					}
-
 
184
					else if(testFlag(parseCComments) && pos[1]=='/') {
133
				*pos=0;
185
						*pos=0;
134
				break;
186
						pos--;
-
 
187
					}
-
 
188
					else {
-
 
189
						*pos=0;
-
 
190
						if(*old!=0)
-
 
191
							PUSH_PREVIOUS_STRING();
-
 
192
						
-
 
193
						PUSH_TOKEN(token::t_slash, pos - line + 1, 0);
-
 
194
						old=pos + 1;
-
 
195
					}
-
 
196
				}
135
			}
197
			}
136
		}
198
		}
137
		// close proc instruction !/
199
		// close proc instruction !/
138
		else if(ch=='!' && pos[1]=='/'){
200
		else if(ch=='!' && pos[1]=='/'){
139
			*pos=0;
201
			*pos=0;
140
			if(*old!=0){
202
			if(*old!=0)
141
				t=new token();
-
 
142
				t->text=old;
-
 
143
				t->line=idx;
-
 
144
				t->col=(int)(old - line) + 1;
-
 
145
				t->type=token::t_text;
-
 
146
				tokens.push_back(t);
203
				PUSH_PREVIOUS_STRING();
147
			}
204
			
148
			t=new token();
-
 
149
			t->type=token::t_closeInstrBlock;
205
			PUSH_TOKEN(token::t_closeInstrBlock, pos - line + 1, 0);
150
			t->line=idx;
-
 
151
			t->col=(int)(pos - line) + 1;
-
 
152
			tokens.push_back(t);
-
 
153
			old=pos + 2;
206
			old=pos + 2;
154
			pos++;
207
			pos++;
155
		}
208
		}
-
 
209
		
156
		pos++;
210
		pos++;
-
 
211
	}
-
 
212
	
-
 
213
	if(bInQuotedString){
-
 
214
		error(idx, (int)strlen(line), Error::errNewLineInConstant);
-
 
215
		return false;
157
	}
216
	}
158
	// if there are some chars left after last delimeter (usually ;) this will process them
217
	// if there are some chars left after last delimeter (usually ;) this will process them
159
	if(old!=pos){
218
	if(old!=pos){
160
		t=new token();
-
 
161
		t->text=old;
-
 
162
		t->line=idx;
-
 
163
		t->col=(int)(old - line) + 1;
219
		PUSH_TOKEN(token::t_text, old - line + 1, old);
164
		t->type=token::t_text;
-
 
165
		tokens.push_back(t);
-
 
166
	}
220
	}
-
 
221
	return true;
167
}
222
}
168
//---------------------------------------------------------------------------------
-
 
169
/*size_t bod_text_parser::parseLines(char *buffer, size_t size, char ***array_ptr)
-
 
170
{
-
 
171
	size_t lineCount=1;
-
 
172
	size_t i;
-
 
173
	for(i=0; i < size; i++){
-
 
174
		if(buffer[i]==0xD){
-
 
175
			buffer[i]=0;
-
 
176
			lineCount++;
-
 
177
			if(((i + 1) < size) && buffer[i+1]==0xA)
-
 
178
				buffer[i+1]=1;
-
 
179
		}
-
 
180
		else if(buffer[i]==0xA){
-
 
181
			buffer[i]=0;
-
 
182
			lineCount++;
-
 
183
		}
-
 
184
	}
-
 
185
	
-
 
186
	char **lines=new char*[lineCount];
-
 
187
	
-
 
188
	char *ptr=buffer, *end=buffer + size;
-
 
189
	
-
 
190
	i=0;
-
 
191
	do{
-
 
192
		if(ptr && *ptr==1) 
-
 
193
			ptr++;
-
 
194
			
-
 
195
		lines[i]=ptr;
-
 
196
		ptr=(char*)memchr(ptr, 0, end - ptr);
-
 
197
		if(ptr) ptr++;
-
 
198
		// this will protect us from binary files where are more zeroes than we expect
-
 
199
		if(++i==lineCount) 
-
 
200
			break; 
-
 
201
	}
-
 
202
	while(ptr);
-
 
203
	
-
 
204
	*array_ptr=lines;
-
 
205
	return lineCount;
-
 
206
}*/
-
 
207
//---------------------------------------------------------------------------------
223
//---------------------------------------------------------------------------------
208
char * bod_text_parser::nextLine()
224
char * bod_text_parser::nextLine()
209
{
225
{
210
/*
-
 
211
	size_t i, size=m_buffLen;
-
 
212
	char *line=0, *buffer=(char*)m_pszBuffer;
-
 
213
	
-
 
214
	for(i=m_lastCharPos; i < size; i++){
-
 
215
		if(buffer[i]==0xD){
-
 
216
			buffer[i]=0;
-
 
217
			if(((i + 1) < size) && buffer[i+1]==0xA)
-
 
218
				buffer[i+1]=1;
-
 
219
			
-
 
220
			line=buffer + m_lastCharPos;
-
 
221
			break;
-
 
222
		}
-
 
223
		else if(buffer[i]==0xA){
-
 
224
			buffer[i]=0;
-
 
225
			line=buffer + m_lastCharPos;
-
 
226
			break;
-
 
227
		}
-
 
228
	}
-
 
229
	if(line==0) line=buffer + m_lastCharPos;
-
 
230
	if(*line==0) line++;
-
 
231
	
-
 
232
	if(m_lastCharPos==size) return NULL;
-
 
233
	
-
 
234
	m_lastCharPos=i;
-
 
235
	
-
 
236
	if(line[0]==1)
-
 
237
		line++;
-
 
238
	return line;
-
 
239
	*/
-
 
240
	
-
 
241
	size_t i, size=m_buffLen;
226
	size_t i, size=m_buffLen;
242
	char *line=0, *buffer=(char*)m_pszBuffer;
227
	char *line=0, *buffer=(char*)m_pszBuffer;
243
	
228
	
244
	for(i=m_lastPos; i < size; i++){
229
	for(i=m_lastPos; i < size; i++){
245
		if(buffer[i]==0xD){
230
		if(buffer[i]==0xD){