libstdc++
codecvt.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 // 2009 Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library. This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 /** @file bits/codecvt.h
27  * This is an internal header file, included by other library headers.
28  * You should not attempt to use it directly.
29  */
30 
31 //
32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
33 //
34 
35 // Written by Benjamin Kosnik <bkoz@redhat.com>
36 
37 #ifndef _CODECVT_H
38 #define _CODECVT_H 1
39 
40 #pragma GCC system_header
41 
42 _GLIBCXX_BEGIN_NAMESPACE(std)
43 
44  /// Empty base class for codecvt facet [22.2.1.5].
46  {
47  public:
48  enum result
49  {
50  ok,
51  partial,
52  error,
53  noconv
54  };
55  };
56 
57  /**
58  * @brief Common base for codecvt functions.
59  *
60  * This template class provides implementations of the public functions
61  * that forward to the protected virtual functions.
62  *
63  * This template also provides abstract stubs for the protected virtual
64  * functions.
65  */
66  template<typename _InternT, typename _ExternT, typename _StateT>
68  : public locale::facet, public codecvt_base
69  {
70  public:
71  // Types:
72  typedef codecvt_base::result result;
73  typedef _InternT intern_type;
74  typedef _ExternT extern_type;
75  typedef _StateT state_type;
76 
77  // 22.2.1.5.1 codecvt members
78  /**
79  * @brief Convert from internal to external character set.
80  *
81  * Converts input string of intern_type to output string of
82  * extern_type. This is analogous to wcsrtombs. It does this by
83  * calling codecvt::do_out.
84  *
85  * The source and destination character sets are determined by the
86  * facet's locale, internal and external types.
87  *
88  * The characters in [from,from_end) are converted and written to
89  * [to,to_end). from_next and to_next are set to point to the
90  * character following the last successfully converted character,
91  * respectively. If the result needed no conversion, from_next and
92  * to_next are not affected.
93  *
94  * The @a state argument should be initialized if the input is at the
95  * beginning and carried from a previous call if continuing
96  * conversion. There are no guarantees about how @a state is used.
97  *
98  * The result returned is a member of codecvt_base::result. If
99  * all the input is converted, returns codecvt_base::ok. If no
100  * conversion is necessary, returns codecvt_base::noconv. If
101  * the input ends early or there is insufficient space in the
102  * output, returns codecvt_base::partial. Otherwise the
103  * conversion failed and codecvt_base::error is returned.
104  *
105  * @param state Persistent conversion state data.
106  * @param from Start of input.
107  * @param from_end End of input.
108  * @param from_next Returns start of unconverted data.
109  * @param to Start of output buffer.
110  * @param to_end End of output buffer.
111  * @param to_next Returns start of unused output area.
112  * @return codecvt_base::result.
113  */
114  result
115  out(state_type& __state, const intern_type* __from,
116  const intern_type* __from_end, const intern_type*& __from_next,
117  extern_type* __to, extern_type* __to_end,
118  extern_type*& __to_next) const
119  {
120  return this->do_out(__state, __from, __from_end, __from_next,
121  __to, __to_end, __to_next);
122  }
123 
124  /**
125  * @brief Reset conversion state.
126  *
127  * Writes characters to output that would restore @a state to initial
128  * conditions. The idea is that if a partial conversion occurs, then
129  * the converting the characters written by this function would leave
130  * the state in initial conditions, rather than partial conversion
131  * state. It does this by calling codecvt::do_unshift().
132  *
133  * For example, if 4 external characters always converted to 1 internal
134  * character, and input to in() had 6 external characters with state
135  * saved, this function would write two characters to the output and
136  * set the state to initialized conditions.
137  *
138  * The source and destination character sets are determined by the
139  * facet's locale, internal and external types.
140  *
141  * The result returned is a member of codecvt_base::result. If the
142  * state could be reset and data written, returns codecvt_base::ok. If
143  * no conversion is necessary, returns codecvt_base::noconv. If the
144  * output has insufficient space, returns codecvt_base::partial.
145  * Otherwise the reset failed and codecvt_base::error is returned.
146  *
147  * @param state Persistent conversion state data.
148  * @param to Start of output buffer.
149  * @param to_end End of output buffer.
150  * @param to_next Returns start of unused output area.
151  * @return codecvt_base::result.
152  */
153  result
154  unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
155  extern_type*& __to_next) const
156  { return this->do_unshift(__state, __to,__to_end,__to_next); }
157 
158  /**
159  * @brief Convert from external to internal character set.
160  *
161  * Converts input string of extern_type to output string of
162  * intern_type. This is analogous to mbsrtowcs. It does this by
163  * calling codecvt::do_in.
164  *
165  * The source and destination character sets are determined by the
166  * facet's locale, internal and external types.
167  *
168  * The characters in [from,from_end) are converted and written to
169  * [to,to_end). from_next and to_next are set to point to the
170  * character following the last successfully converted character,
171  * respectively. If the result needed no conversion, from_next and
172  * to_next are not affected.
173  *
174  * The @a state argument should be initialized if the input is at the
175  * beginning and carried from a previous call if continuing
176  * conversion. There are no guarantees about how @a state is used.
177  *
178  * The result returned is a member of codecvt_base::result. If
179  * all the input is converted, returns codecvt_base::ok. If no
180  * conversion is necessary, returns codecvt_base::noconv. If
181  * the input ends early or there is insufficient space in the
182  * output, returns codecvt_base::partial. Otherwise the
183  * conversion failed and codecvt_base::error is returned.
184  *
185  * @param state Persistent conversion state data.
186  * @param from Start of input.
187  * @param from_end End of input.
188  * @param from_next Returns start of unconverted data.
189  * @param to Start of output buffer.
190  * @param to_end End of output buffer.
191  * @param to_next Returns start of unused output area.
192  * @return codecvt_base::result.
193  */
194  result
195  in(state_type& __state, const extern_type* __from,
196  const extern_type* __from_end, const extern_type*& __from_next,
197  intern_type* __to, intern_type* __to_end,
198  intern_type*& __to_next) const
199  {
200  return this->do_in(__state, __from, __from_end, __from_next,
201  __to, __to_end, __to_next);
202  }
203 
204  int
205  encoding() const throw()
206  { return this->do_encoding(); }
207 
208  bool
209  always_noconv() const throw()
210  { return this->do_always_noconv(); }
211 
212  int
213  length(state_type& __state, const extern_type* __from,
214  const extern_type* __end, size_t __max) const
215  { return this->do_length(__state, __from, __end, __max); }
216 
217  int
218  max_length() const throw()
219  { return this->do_max_length(); }
220 
221  protected:
222  explicit
223  __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
224 
225  virtual
226  ~__codecvt_abstract_base() { }
227 
228  /**
229  * @brief Convert from internal to external character set.
230  *
231  * Converts input string of intern_type to output string of
232  * extern_type. This function is a hook for derived classes to change
233  * the value returned. @see out for more information.
234  */
235  virtual result
236  do_out(state_type& __state, const intern_type* __from,
237  const intern_type* __from_end, const intern_type*& __from_next,
238  extern_type* __to, extern_type* __to_end,
239  extern_type*& __to_next) const = 0;
240 
241  virtual result
242  do_unshift(state_type& __state, extern_type* __to,
243  extern_type* __to_end, extern_type*& __to_next) const = 0;
244 
245  virtual result
246  do_in(state_type& __state, const extern_type* __from,
247  const extern_type* __from_end, const extern_type*& __from_next,
248  intern_type* __to, intern_type* __to_end,
249  intern_type*& __to_next) const = 0;
250 
251  virtual int
252  do_encoding() const throw() = 0;
253 
254  virtual bool
255  do_always_noconv() const throw() = 0;
256 
257  virtual int
258  do_length(state_type&, const extern_type* __from,
259  const extern_type* __end, size_t __max) const = 0;
260 
261  virtual int
262  do_max_length() const throw() = 0;
263  };
264 
265  /// @brief class codecvt [22.2.1.5].
266  /// NB: Generic, mostly useless implementation.
267  template<typename _InternT, typename _ExternT, typename _StateT>
268  class codecvt
269  : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
270  {
271  public:
272  // Types:
273  typedef codecvt_base::result result;
274  typedef _InternT intern_type;
275  typedef _ExternT extern_type;
276  typedef _StateT state_type;
277 
278  protected:
279  __c_locale _M_c_locale_codecvt;
280 
281  public:
282  static locale::id id;
283 
284  explicit
285  codecvt(size_t __refs = 0)
287 
288  explicit
289  codecvt(__c_locale __cloc, size_t __refs = 0);
290 
291  protected:
292  virtual
293  ~codecvt() { }
294 
295  virtual result
296  do_out(state_type& __state, const intern_type* __from,
297  const intern_type* __from_end, const intern_type*& __from_next,
298  extern_type* __to, extern_type* __to_end,
299  extern_type*& __to_next) const;
300 
301  virtual result
302  do_unshift(state_type& __state, extern_type* __to,
303  extern_type* __to_end, extern_type*& __to_next) const;
304 
305  virtual result
306  do_in(state_type& __state, const extern_type* __from,
307  const extern_type* __from_end, const extern_type*& __from_next,
308  intern_type* __to, intern_type* __to_end,
309  intern_type*& __to_next) const;
310 
311  virtual int
312  do_encoding() const throw();
313 
314  virtual bool
315  do_always_noconv() const throw();
316 
317  virtual int
318  do_length(state_type&, const extern_type* __from,
319  const extern_type* __end, size_t __max) const;
320 
321  virtual int
322  do_max_length() const throw();
323  };
324 
325  template<typename _InternT, typename _ExternT, typename _StateT>
327 
328  /// class codecvt<char, char, mbstate_t> specialization.
329  template<>
330  class codecvt<char, char, mbstate_t>
331  : public __codecvt_abstract_base<char, char, mbstate_t>
332  {
333  public:
334  // Types:
335  typedef char intern_type;
336  typedef char extern_type;
337  typedef mbstate_t state_type;
338 
339  protected:
340  __c_locale _M_c_locale_codecvt;
341 
342  public:
343  static locale::id id;
344 
345  explicit
346  codecvt(size_t __refs = 0);
347 
348  explicit
349  codecvt(__c_locale __cloc, size_t __refs = 0);
350 
351  protected:
352  virtual
353  ~codecvt();
354 
355  virtual result
356  do_out(state_type& __state, const intern_type* __from,
357  const intern_type* __from_end, const intern_type*& __from_next,
358  extern_type* __to, extern_type* __to_end,
359  extern_type*& __to_next) const;
360 
361  virtual result
362  do_unshift(state_type& __state, extern_type* __to,
363  extern_type* __to_end, extern_type*& __to_next) const;
364 
365  virtual result
366  do_in(state_type& __state, const extern_type* __from,
367  const extern_type* __from_end, const extern_type*& __from_next,
368  intern_type* __to, intern_type* __to_end,
369  intern_type*& __to_next) const;
370 
371  virtual int
372  do_encoding() const throw();
373 
374  virtual bool
375  do_always_noconv() const throw();
376 
377  virtual int
378  do_length(state_type&, const extern_type* __from,
379  const extern_type* __end, size_t __max) const;
380 
381  virtual int
382  do_max_length() const throw();
383  };
384 
385 #ifdef _GLIBCXX_USE_WCHAR_T
386  /// class codecvt<wchar_t, char, mbstate_t> specialization.
387  template<>
388  class codecvt<wchar_t, char, mbstate_t>
389  : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
390  {
391  public:
392  // Types:
393  typedef wchar_t intern_type;
394  typedef char extern_type;
395  typedef mbstate_t state_type;
396 
397  protected:
398  __c_locale _M_c_locale_codecvt;
399 
400  public:
401  static locale::id id;
402 
403  explicit
404  codecvt(size_t __refs = 0);
405 
406  explicit
407  codecvt(__c_locale __cloc, size_t __refs = 0);
408 
409  protected:
410  virtual
411  ~codecvt();
412 
413  virtual result
414  do_out(state_type& __state, const intern_type* __from,
415  const intern_type* __from_end, const intern_type*& __from_next,
416  extern_type* __to, extern_type* __to_end,
417  extern_type*& __to_next) const;
418 
419  virtual result
420  do_unshift(state_type& __state,
421  extern_type* __to, extern_type* __to_end,
422  extern_type*& __to_next) const;
423 
424  virtual result
425  do_in(state_type& __state,
426  const extern_type* __from, const extern_type* __from_end,
427  const extern_type*& __from_next,
428  intern_type* __to, intern_type* __to_end,
429  intern_type*& __to_next) const;
430 
431  virtual
432  int do_encoding() const throw();
433 
434  virtual
435  bool do_always_noconv() const throw();
436 
437  virtual
438  int do_length(state_type&, const extern_type* __from,
439  const extern_type* __end, size_t __max) const;
440 
441  virtual int
442  do_max_length() const throw();
443  };
444 #endif //_GLIBCXX_USE_WCHAR_T
445 
446  /// class codecvt_byname [22.2.1.6].
447  template<typename _InternT, typename _ExternT, typename _StateT>
448  class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
449  {
450  public:
451  explicit
452  codecvt_byname(const char* __s, size_t __refs = 0)
454  {
455  if (__builtin_strcmp(__s, "C") != 0
456  && __builtin_strcmp(__s, "POSIX") != 0)
457  {
458  this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
459  this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
460  }
461  }
462 
463  protected:
464  virtual
465  ~codecvt_byname() { }
466  };
467 
468  // Inhibit implicit instantiations for required instantiations,
469  // which are defined via explicit instantiations elsewhere.
470  // NB: This syntax is a GNU extension.
471 #if _GLIBCXX_EXTERN_TEMPLATE
472  extern template class codecvt_byname<char, char, mbstate_t>;
473 
474  extern template
476  use_facet<codecvt<char, char, mbstate_t> >(const locale&);
477 
478  extern template
479  bool
480  has_facet<codecvt<char, char, mbstate_t> >(const locale&);
481 
482 #ifdef _GLIBCXX_USE_WCHAR_T
483  extern template class codecvt_byname<wchar_t, char, mbstate_t>;
484 
485  extern template
487  use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
488 
489  extern template
490  bool
491  has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
492 #endif
493 #endif
494 
495 _GLIBCXX_END_NAMESPACE
496 
497 #endif // _CODECVT_H
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:115
Facet ID class.
result unshift(state_type &__state, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Reset conversion state.
Definition: codecvt.h:154
Extension to use iconv for dealing with character encodings.
class codecvt specialization.
Definition: codecvt.h:330
class codecvt_byname [22.2.1.6].
Definition: codecvt.h:448
Localization functionality base class.
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:195
Empty base class for codecvt facet [22.2.1.5].
Definition: codecvt.h:45
Container class for localization functionality.
class codecvt [22.2.1.5]. NB: Generic, mostly useless implementation.
Definition: codecvt.h:268
class codecvt specialization.
Definition: codecvt.h:388
Common base for codecvt functions.
Definition: codecvt.h:67