| 1 | cycrow | 1 | /*
 | 
        
           |  |  | 2 |   defines token_stream - class that acts as high level wrapper around 
 | 
        
           |  |  | 3 |   bod_text_parser. It mimics behaviour of stream.
 | 
        
           |  |  | 4 |   | 
        
           |  |  | 5 |   There is potential hazard when loading strings because only finite amount of 
 | 
        
           |  |  | 6 |   tokens is loaded into memory. If number of tokens forming the string is greater 
 | 
        
           |  |  | 7 |   than the token unload threshold, seeking back to beginning of string will fail
 | 
        
           |  |  | 8 |   | 
        
           |  |  | 9 |   see bod_bob_parser::loadString
 | 
        
           |  |  | 10 |   | 
        
           |  |  | 11 | */
 | 
        
           |  |  | 12 |   | 
        
           |  |  | 13 | #ifndef TOKEN_STREAM_INCLUDED
 | 
        
           |  |  | 14 | #define TOKEN_STREAM_INCLUDED
 | 
        
           |  |  | 15 |   | 
        
           |  |  | 16 | #include "bob_dom_stream.h"
 | 
        
           |  |  | 17 | #include "bod_text_parser.h"
 | 
        
           |  |  | 18 |   | 
        
           |  |  | 19 | #include "../common/ext_list.h"
 | 
        
           |  |  | 20 |   | 
        
           |  |  | 21 | class token_stream : public ext::stream_base
 | 
        
           |  |  | 22 | {
 | 
        
           |  |  | 23 | 	public:
 | 
        
           |  |  | 24 | 		typedef bod_text_parser::token token;
 | 
        
           |  |  | 25 | 		typedef bod_text_parser::TokenList::size_type size_type;
 | 
        
           |  |  | 26 | 		typedef int offset_type;
 | 
        
           |  |  | 27 |   | 
        
           |  |  | 28 | 	private:
 | 
        
           |  |  | 29 | 		typedef bod_text_parser::TokenList::iterator iterator;
 | 
        
           |  |  | 30 |   | 
        
           |  |  | 31 | 		bod_text_parser p;
 | 
        
           |  |  | 32 | 		iterator m_end;
 | 
        
           |  |  | 33 | 		iterator m_pos;
 | 
        
           |  |  | 34 | 		size_type m_left;
 | 
        
           |  |  | 35 | 		size_type m_right;
 | 
        
           |  |  | 36 | 		bool m_bIgnoreRemarks;
 | 
        
           |  |  | 37 |   | 
        
           |  |  | 38 | 		void advance(iterator &it, offset_type off)
 | 
        
           |  |  | 39 | 		{
 | 
        
           |  |  | 40 | 			if(off > 0){
 | 
        
           |  |  | 41 | 				for(offset_type i=0; i < off; i++)
 | 
        
           |  |  | 42 | 					++it;
 | 
        
           |  |  | 43 | 			}
 | 
        
           |  |  | 44 | 			else{
 | 
        
           |  |  | 45 | 				for(offset_type i=off; i < 0; i++)
 | 
        
           |  |  | 46 | 					--it;
 | 
        
           |  |  | 47 | 			}
 | 
        
           |  |  | 48 | 		}
 | 
        
           |  |  | 49 |   | 
        
           |  |  | 50 | 		void parse(size_t limit)
 | 
        
           |  |  | 51 | 		{
 | 
        
           |  |  | 52 | 			m_right+=p.parseBuffer(limit);
 | 
        
           |  |  | 53 | 		}
 | 
        
           |  |  | 54 |   | 
        
           |  |  | 55 | 		void unload(size_t count)
 | 
        
           |  |  | 56 | 		{
 | 
        
           |  |  | 57 | 			if(count > m_left) count=m_left;
 | 
        
           |  |  | 58 | 			m_left-=count;
 | 
        
           |  |  | 59 | 			while(count--){
 | 
        
           |  |  | 60 | 				delete *(p.tokens.begin());
 | 
        
           |  |  | 61 | 				p.tokens.erase(p.tokens.begin());
 | 
        
           |  |  | 62 | 			}
 | 
        
           |  |  | 63 | 		}
 | 
        
           |  |  | 64 |   | 
        
           |  |  | 65 | 	public:
 | 
        
           |  |  | 66 | 		static const int tokenTreshold = 200; // how much tokens to unload
 | 
        
           |  |  | 67 | 		static const int tokenUnloadTreshold = 2 * 200; // when to start unloading tokens
 | 
        
           |  |  | 68 |   | 
        
           |  |  | 69 | 		token_stream() { m_bIgnoreRemarks=false; }
 | 
        
           |  |  | 70 |   | 
        
           |  |  | 71 | 		void rdbuffer(char *pszBuffer, size_t size) 
 | 
        
           |  |  | 72 | 		{ 
 | 
        
           |  |  | 73 | 			p.ignoreRemarks(ignoreRemarks());
 | 
        
           |  |  | 74 | 			p.preParseBuffer(pszBuffer, size); 
 | 
        
           |  |  | 75 | 			m_left=0;
 | 
        
           |  |  | 76 | 			m_right=p.parseBuffer(1); 
 | 
        
           |  |  | 77 | 			m_pos=p.tokens.begin();
 | 
        
           |  |  | 78 | 			m_end=p.tokens.end();
 | 
        
           |  |  | 79 | 			clear(p.tokens.size() ? goodbit : badbit); 
 | 
        
           |  |  | 80 | 		}
 | 
        
           |  |  | 81 |   | 
        
           |  |  | 82 | 		token* tok() { return good() ? *m_pos : 0; }
 | 
        
           |  |  | 83 |   | 
        
           |  |  | 84 | 		size_type tell() const { return m_left; }
 | 
        
           |  |  | 85 | 		size_type avail() const { return m_right; }
 | 
        
           |  |  | 86 | 		size_type size() const { return p.tokens.size(); }
 | 
        
           |  |  | 87 |   | 
        
           |  |  | 88 | 		token_stream& operator++()	{ advance(); return *this; }
 | 
        
           |  |  | 89 | 		token_stream& operator--()	{ advance(-1); return *this; }
 | 
        
           |  |  | 90 |   | 
        
           |  |  | 91 | 		bool ignoreRemarks() const { return m_bIgnoreRemarks; } 
 | 
        
           |  |  | 92 | 		void ignoreRemarks(bool ignore) { m_bIgnoreRemarks=ignore; }
 | 
        
           |  |  | 93 |   | 
        
           |  |  | 94 | 		bool advance(offset_type offset=1)
 | 
        
           |  |  | 95 | 		{
 | 
        
           |  |  | 96 | 			if(offset > (offset_type)m_right)
 | 
        
           |  |  | 97 | 				parse(offset < tokenTreshold ? tokenTreshold : offset);
 | 
        
           |  |  | 98 |   | 
        
           |  |  | 99 | 			if(offset > (offset_type)m_right || (offset_type)m_left + offset < 0)
 | 
        
           |  |  | 100 | 				setstate(failbit);
 | 
        
           |  |  | 101 | 			else{
 | 
        
           |  |  | 102 | 				m_left+=offset; m_right-=offset;
 | 
        
           |  |  | 103 | 				if(m_right==0) 
 | 
        
           |  |  | 104 | 					parse(tokenTreshold);
 | 
        
           |  |  | 105 |   | 
        
           |  |  | 106 | 				advance(m_pos, offset);
 | 
        
           |  |  | 107 |   | 
        
           |  |  | 108 | 				if(m_pos==m_end) 
 | 
        
           |  |  | 109 | 					setstate(eofbit);
 | 
        
           |  |  | 110 | 				else
 | 
        
           |  |  | 111 | 					clear((state)(rdstate() & ~eofbit));
 | 
        
           |  |  | 112 | 			}
 | 
        
           |  |  | 113 | 			if(m_left > tokenUnloadTreshold)
 | 
        
           |  |  | 114 | 				unload(tokenTreshold);
 | 
        
           |  |  | 115 | 			return good();
 | 
        
           |  |  | 116 | 		}
 | 
        
           |  |  | 117 |   | 
        
           |  |  | 118 | 		token* previous()
 | 
        
           |  |  | 119 | 		{
 | 
        
           |  |  | 120 | 			if(m_left==0)
 | 
        
           |  |  | 121 | 				return 0;
 | 
        
           |  |  | 122 | 			else
 | 
        
           |  |  | 123 | 				return *(m_pos-1);
 | 
        
           |  |  | 124 | 		}
 | 
        
           |  |  | 125 | };
 | 
        
           |  |  | 126 |   | 
        
           |  |  | 127 | #endif // !defined(TOKEN_STREAM_INCLUDED)
 |