Wolframe, 0.0.3

string.hpp
Go to the documentation of this file.
1 /************************************************************************
2 Copyright (C) 2011 - 2014 Project Wolframe.
3 All rights reserved.
4 
5 This file is part of Project Wolframe.
6 
7 Commercial Usage
8 Licensees holding valid Project Wolframe Commercial licenses may
9 use this file in accordance with the Project Wolframe
10 Commercial License Agreement provided with the Software or,
11 alternatively, in accordance with the terms contained
12 in a written agreement between the licensee and Project Wolframe.
13 
14 GNU General Public License Usage
15 Alternatively, you can redistribute this file and/or modify it
16 under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19 
20 Wolframe is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24 
25 You should have received a copy of the GNU General Public License
26 along with Wolframe. If not, see <http://www.gnu.org/licenses/>.
27 
28 If you have questions regarding the use of this file, please contact
29 Project Wolframe.
30 
31 ************************************************************************/
38 #ifndef _Wolframe_TYPES_STRING_HPP_INCLUDED
39 #define _Wolframe_TYPES_STRING_HPP_INCLUDED
40 #include <cstdlib>
41 #include <string>
42 #include <stdexcept>
43 #include <boost/shared_ptr.hpp>
44 namespace _Wolframe {
45 namespace types {
46 
49 class String
50 {
51 public:
54  enum Encoding
55  {
57  UTF8,
64  };
65 
67  String();
73  String( const void* content, std::size_t contentsize, Encoding encoding_=UTF8, unsigned char codepage_=0);
76  String( const std::string& val);
79  String( const std::wstring& val);
82  String( const String& o);
84  ~String();
85 
88  String translateEncoding( Encoding encoding_, unsigned char codepage_=0) const;
89 
92  static std::size_t elementSize( Encoding e)
93  {
94  static std::size_t ar[] = {1,1,2,2,2,2,4,4};
95  return ar[e];
96  }
99  std::size_t elementSize() const
100  {
101  return elementSize( (Encoding)m_encoding);
102  }
103 
108  static const char* encodingName( Encoding e, unsigned char c=0)
109  {
110  static const char* ar[] = {"ISO8859","UTF-8","UTF-16BE","UTF-16LE","UCS-2BE","UCS-2LE","UCS-4BE","UCS-4LE"};
111  static const char* cp[] = {"ISO8859-1","ISO8859-2","ISO8859-3","ISO8859-4","ISO8859-5","ISO8859-6","ISO8859-7","ISO8859-8","ISO8859-9"};
112  if (e == ISO8859 && c)
113  {
114  if (c > 9) return 0;
115  return cp[ c];
116  }
117  else
118  {
119  return ar[e];
120  }
121  }
124  const char* encodingName() const {return encodingName((Encoding)m_encoding, m_codepage);}
125 
129  {
131  unsigned char codepage;
132 
133  EncodingAttrib( Encoding encoding_=UTF8, unsigned char codepage_=0)
134  :encoding(encoding_),codepage(codepage_){}
137 
140  const char* encodingName() const
142  };
145  static EncodingAttrib getEncodingFromName( const std::string& name);
146 
150  {
154  };
159  static EncodingClass::Id guessEncoding( const char* content, std::size_t size);
163  static EncodingClass::Id guessEncoding( const std::string& content);
164 
167  Encoding encoding() const {return (Encoding)m_encoding;}
168  unsigned char codepage() const {return m_codepage;}
171 
174  std::string tostring() const;
177  std::wstring towstring() const;
178 
182  std::size_t size() const {return m_size;}
183 
186  const wchar_t* w_str() const {if ((Encoding)m_encoding != UTF16BE) throw std::logic_error("cannot call w_str() for non UTF-16BE encoding"); return (const wchar_t*)(const void*)m_ar;}
189  const char* c_str() const {if ((Encoding)m_encoding != UTF8) throw std::logic_error("cannot call c_str() for non UTF-8 encoding"); return (const char*)m_ar;}
192  const void* ptr() const {return m_ar;}
193 
194 private:
195  friend class StringConst;
202  String( const ConstQualifier&, const void* content, std::size_t contentsize, Encoding encoding_=UTF8, unsigned char codepage_=0);
203 
204 private:
205  unsigned char m_encoding;
206  unsigned char m_codepage;
207  bool m_isconst;
208  std::size_t m_size;
209  unsigned char* m_ar;
210 };
211 
216  :public String
217 {
218 public:
226  StringConst( const void* content, std::size_t contentsize, Encoding encoding_=UTF8, unsigned char codepage_=0)
227  :String(ConstC,content,contentsize,encoding_,codepage_){}
230  StringConst( const std::string& val)
231  :String(ConstC,val.c_str(),val.size(),UTF8){}
234  StringConst( const std::wstring& val)
235  :String(ConstC,val.c_str(),val.size()*2,UTF16BE){}
238  StringConst( const String& o)
240 };
241 
242 }}//namespace
243 #endif
244 
UCS-4BE (only available with WITH_TEXTWOLF=1)
Definition: string.hpp:62
Definition: string.hpp:196
UTF-16LE (only available with WITH_TEXTWOLF=1)
Definition: string.hpp:59
const char * c_str() const
Get the base pointer of the string in case of a string in encoding UTF-8.
Definition: string.hpp:189
std::size_t m_size
number of elements in string
Definition: string.hpp:208
bool m_isconst
true, if the string is allocated
Definition: string.hpp:207
std::wstring towstring() const
Get the content of the string as std::wstring in UTF-16BE (wchar_t)
Structure namespace for the enumeration of supported encoding classes.
Definition: string.hpp:149
ConstQualifier
Definition: string.hpp:196
StringConst()
Constructor.
Definition: string.hpp:220
static EncodingClass::Id guessEncoding(const char *content, std::size_t size)
Guess the encoding from content.
static const char * encodingName(Encoding e, unsigned char c=0)
Map encoding to string.
Definition: string.hpp:108
String translateEncoding(Encoding encoding_, unsigned char codepage_=0) const
Translate this string into the encoding specified.
StringConst(const std::wstring &val)
Constructor from a reference to a std::string in UTF-16BE (wchar_t)
Definition: string.hpp:234
std::size_t elementSize() const
Get the size of an element (not equal to char size for a variable-width encodings) of this string...
Definition: string.hpp:99
Constant string that is not owned by the structure (caller has ownership)
Definition: string.hpp:215
const char * encodingName() const
Get the encoding of this string as string.
Definition: string.hpp:124
const void * ptr() const
Get the base pointer of the string in any encoding.
Definition: string.hpp:192
String()
Default constructor.
EncodingAttrib(Encoding encoding_=UTF8, unsigned char codepage_=0)
Definition: string.hpp:133
unsigned char codepage() const
Get the code page of this string in case of an ISO8859 encoding.
Definition: string.hpp:170
const wchar_t * w_str() const
Get the base pointer of the string in case of a string in encoding UTF-16BE (wchar_t) ...
Definition: string.hpp:186
std::size_t size() const
Get the size of the string in elements.
Definition: string.hpp:182
unsigned char m_codepage
codepage for ISO8859 encodings
Definition: string.hpp:206
UTF-8 (full range only with WITH_TEXTWOLF=1, range 0..127 otherwise)
Definition: string.hpp:57
Latin (full range only with WITH_TEXTWOLF=1, range 0..255 (code page 1) and range 0...
Definition: string.hpp:56
const char * encodingName() const
Get this encoding as string.
Definition: string.hpp:140
Encoding encoding() const
Get the encoding of this string.
Definition: string.hpp:167
Encoding
Encoding of a database string.
Definition: string.hpp:54
UCS-4LE (only available with WITH_TEXTWOLF=1)
Definition: string.hpp:63
unsigned char * m_ar
array of elements
Definition: string.hpp:209
StringConst(const String &o)
Copy constructor.
Definition: string.hpp:238
static EncodingAttrib getEncodingFromName(const std::string &name)
Parse the attributes of an encoding from an encoding name.
UCS-2BE (only available with WITH_TEXTWOLF=1)
Definition: string.hpp:60
StringConst(const std::string &val)
Constructor from a reference to a std::string in UTF-8.
Definition: string.hpp:230
std::string tostring() const
Get the content of the string as std::string in UTF-8.
UCS-2LE (only available with WITH_TEXTWOLF=1)
Definition: string.hpp:61
Attributes describing an encoding completely.
Definition: string.hpp:128
Encoding encoding
Definition: string.hpp:130
unsigned char codepage
Definition: string.hpp:131
Type for representing strings in various encodings.
Definition: string.hpp:49
UTF-16BE (full range only with WITH_TEXTWOLF=1, range 0..255 otherwise)
Definition: string.hpp:58
unsigned char m_encoding
encoding (type Encoding)
Definition: string.hpp:205
StringConst(const void *content, std::size_t contentsize, Encoding encoding_=UTF8, unsigned char codepage_=0)
Constructor.
Definition: string.hpp:226
static std::size_t elementSize(Encoding e)
Map encoding to sizeof element in string (not equal to char size for a variable-width encodings) ...
Definition: string.hpp:92
EncodingAttrib(const EncodingAttrib &o)
Definition: string.hpp:135
Id
Encoding class for guessing encoding.
Definition: string.hpp:153