]>
Commit | Line | Data |
---|---|---|
3c054baf RC |
1 | /* |
2 | * Copyright (c) 2002, Robert Collins. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * A copy of the GNU General Public License can be found at | |
10 | * http://www.gnu.org/ | |
11 | * | |
12 | * Written by Robert Collins. | |
13 | * | |
14 | */ | |
15 | ||
16 | // A String class to replace all the char * manipulation. | |
17 | ||
18 | #include "String++.h" | |
19 | #include <string.h> | |
20 | #include <ctype.h> | |
3c054baf | 21 | #include "io_stream.h" |
6391823e | 22 | #include <iostream> |
15004270 | 23 | #include <sstream> |
b401ef47 | 24 | #include <string> |
3c054baf | 25 | |
6625e635 RC |
26 | using namespace std; |
27 | ||
3c054baf RC |
28 | // _data |
29 | ||
30 | String::_data::_data(_data const &aData) : count (1), theString (new unsigned char[aData.length]), cstr(0), length (aData.length) { | |
31 | memcpy (theString, aData.theString, aData.length); | |
32 | } | |
33 | ||
34 | String::_data::_data(): count (1), theString(new unsigned char[0]), cstr (0), length (0) {} | |
35 | String::_data::_data(size_t aLength): count (1), theString(new unsigned char[aLength]), cstr(0), length (aLength) {} | |
36 | String::_data::~_data () | |
37 | { | |
38 | if (theString) | |
39 | delete[] theString; | |
40 | if (cstr) | |
41 | delete[] cstr; | |
42 | } | |
43 | ||
44 | //String | |
45 | ||
46 | String::String (const char *acString) : theData (new _data(acString ? strlen(acString) : 0)) | |
47 | { | |
48 | memcpy (theData->theString, acString, theData->length); | |
49 | } | |
50 | ||
51 | String::~String () | |
52 | { | |
53 | if (--theData->count == 0) | |
54 | delete theData; | |
55 | } | |
56 | ||
076654e7 RC |
57 | String::String (int const anInt) |
58 | { | |
15004270 | 59 | ostringstream os; |
076654e7 | 60 | os << anInt; |
15004270 RC |
61 | theData = new _data(os.str().size()); |
62 | memcpy (theData->theString, os.str().c_str(), os.str().size()); | |
076654e7 RC |
63 | } |
64 | ||
b401ef47 RC |
65 | String::String (string const &aString) : theData (new _data (aString.c_str() ? strlen (aString.c_str()) : 0)) |
66 | { | |
67 | memcpy (theData->theString, aString.c_str(), theData->length); | |
68 | } | |
69 | ||
3c054baf RC |
70 | // able to cache the result if needed. |
71 | char * | |
72 | String::cstr () | |
73 | { | |
74 | if (theData->length == 0) | |
75 | return NULL; | |
76 | char * tempcStr =new char[theData->length + 1]; | |
77 | // remove when exceptions are done | |
78 | if (!tempcStr) | |
79 | exit (100); | |
80 | tempcStr[theData->length] = '\0'; | |
81 | memcpy (tempcStr, theData->theString, theData->length); | |
82 | return tempcStr; | |
83 | } | |
84 | ||
85 | char * | |
86 | String::cstr () const | |
87 | { | |
88 | if (theData->length == 0) | |
89 | return NULL; | |
90 | char * tempcStr =new char[theData->length + 1]; | |
91 | // remove when exceptions are done | |
92 | if (!tempcStr) | |
93 | exit (100); | |
94 | tempcStr[theData->length] = '\0'; | |
95 | memcpy (tempcStr, theData->theString, theData->length); | |
96 | return tempcStr; | |
97 | } | |
98 | ||
99 | // able to cache the result if needed. | |
100 | char const * | |
101 | String::cstr_oneuse () const | |
102 | { | |
103 | if (theData->length == 0) | |
104 | return NULL; | |
105 | if (theData->cstr) | |
106 | delete[] theData->cstr; | |
107 | theData->cstr = new char[theData->length + 1]; | |
108 | theData->cstr[theData->length] = '\0'; | |
109 | memcpy (theData->cstr, theData->theString, theData->length); | |
110 | return theData->cstr; | |
111 | } | |
112 | ||
113 | // does this character exist in the string? | |
114 | // 0 is false, 1 is the first position... | |
115 | // XXX FIXME: Introduce npos, and change all | |
116 | // if (size) calls to be if (size()==npos) | |
117 | size_t | |
118 | String::find(char aChar) const | |
119 | { | |
120 | for (size_t i=0; i < theData->length; ++i) | |
121 | if (theData->theString[i] == aChar) | |
122 | return i+1; | |
123 | return 0; | |
124 | } | |
125 | ||
b401ef47 | 126 | String |
ad646f43 | 127 | String::substr(size_t start, int len) const |
b401ef47 RC |
128 | { |
129 | // Adapt the C++ string class | |
130 | return string(cstr_oneuse()).substr(start, len); | |
131 | } | |
132 | ||
3c054baf RC |
133 | int |
134 | String::compare (String const &aString, size_t const count) const | |
135 | { | |
136 | // trivial cases: | |
137 | if (theData == aString.theData) | |
138 | return 0; | |
139 | size_t length = count ? count : theData->length; | |
140 | if (length > theData->length) | |
141 | length = theData->length; | |
142 | if (length > aString.theData->length) | |
143 | length = aString.theData->length; | |
144 | size_t i; | |
145 | for (i=0; i < length ; ++i) | |
146 | if (theData->theString[i] < aString.theData->theString[i]) | |
147 | return -1; | |
148 | else if (theData->theString[i] > aString.theData->theString[i]) | |
149 | return 1; | |
150 | // equal for length | |
151 | if (i == count && count != 0) | |
152 | return 0; | |
153 | if (theData->length < aString.theData->length) | |
154 | return -1; | |
155 | else if (theData->length > aString.theData->length) | |
156 | return 1; | |
157 | return 0; | |
158 | } | |
159 | ||
160 | int | |
161 | String::casecompare (String const &aString, size_t const count) const | |
162 | { | |
163 | // trivial cases: | |
164 | if (theData == aString.theData) | |
165 | return 0; | |
166 | size_t length = count ? count : theData->length; | |
167 | if (length > theData->length) | |
168 | length = theData->length; | |
169 | if (length > aString.theData->length) | |
170 | length = aString.theData->length; | |
171 | size_t i; | |
172 | for (i=0; i < length; ++i) | |
173 | if (toupper(theData->theString[i]) < toupper(aString.theData->theString[i])) | |
174 | return -1; | |
175 | else if (toupper(theData->theString[i]) > toupper(aString.theData->theString[i])) | |
176 | return 1; | |
177 | // equal for length | |
178 | if (i == count && count != 0) | |
179 | return 0; | |
180 | if (theData->length < aString.theData->length) | |
181 | return -1; | |
182 | else if (theData->length > aString.theData->length) | |
183 | return 1; | |
184 | return 0; | |
185 | } | |
186 | ||
3c054baf RC |
187 | String & |
188 | String::operator+= (String const &aString) | |
189 | { | |
190 | if (theData->count > 1) | |
191 | { | |
192 | _data * someData = new _data(*theData); | |
193 | --theData->count; | |
194 | theData = someData; | |
195 | } | |
196 | ||
197 | unsigned char *tempString = theData->theString; | |
198 | theData->theString = new unsigned char [theData->length + aString.theData->length]; | |
199 | // remove when exceptions are done | |
200 | if (!theData->theString) | |
201 | exit (100); | |
202 | memcpy (theData->theString, tempString, theData->length); | |
203 | delete[] tempString; | |
204 | memcpy (&theData->theString[theData->length], aString.theData->theString, aString.theData->length); | |
205 | theData->length += aString.theData->length; | |
206 | return *this; | |
207 | } | |
208 | ||
209 | String | |
210 | String::operator + (String const &aString) const | |
211 | { | |
212 | unsigned char *tempcString = new unsigned char [theData->length + aString.theData->length]; | |
213 | // remove when exceptions are done | |
214 | if (!tempcString) | |
215 | exit (100); | |
216 | memcpy (tempcString, theData->theString, theData->length); | |
217 | memcpy (&tempcString[theData->length], aString.theData->theString, aString.theData->length); | |
218 | return absorb (tempcString, theData->length + aString.theData->length); | |
219 | } | |
220 | ||
221 | String | |
222 | String::operator + (char const *aString) const | |
223 | { | |
224 | // expensive, but quick to code. | |
225 | return *this + String (aString); | |
226 | } | |
227 | ||
228 | bool | |
229 | String::operator == (String const &rhs) const | |
230 | { | |
231 | return compare (rhs) ? false : true; | |
232 | } | |
233 | ||
234 | bool | |
235 | String::operator == (char const *rhs) const | |
236 | { | |
237 | return compare (rhs) ? false : true; | |
238 | } | |
239 | ||
69711722 RC |
240 | bool |
241 | String::operator != (String const &rhs) const | |
242 | { | |
243 | return !(*this == rhs); | |
244 | } | |
245 | ||
246 | bool | |
247 | String::operator != (char const *rhs) const | |
248 | { | |
249 | return !(*this == rhs); | |
250 | } | |
251 | ||
3c054baf RC |
252 | String |
253 | String::absorb (unsigned char *aString, size_t aLength) | |
254 | { | |
255 | String theString; | |
256 | theString.theData->theString = aString; | |
257 | theString.theData->length = aLength; | |
258 | return theString; | |
259 | } | |
260 | ||
261 | int | |
262 | String::casecompare (String const lhs, String const rhs) | |
263 | { | |
264 | return lhs.casecompare (rhs); | |
265 | } | |
6391823e | 266 | |
864a5ec1 MB |
267 | /* |
268 | * This supports two wildcard characters, '*' and '?', as well as the | |
269 | * '[]'-style character sets ('^' to invert). | |
270 | * Use '\' to escape special characters. | |
271 | * Shamelessly stolen from fileutils-4.1 (adapted for <buf,len> strings). | |
272 | */ | |
273 | static bool | |
274 | strmatch (const unsigned char *pattern, size_t plen, | |
275 | const unsigned char *name, size_t nlen) | |
276 | { | |
277 | register const unsigned char *pend = pattern + plen, *nend = name + nlen; | |
278 | register const unsigned char *p = pattern, *n = name; | |
279 | register unsigned char c; | |
280 | ||
281 | while (p < pend) | |
282 | { | |
283 | c = *p++; | |
284 | switch (c) | |
285 | { | |
286 | case '?': /* A '?' matches exactly one character */ | |
287 | if (n == nend) | |
288 | return false; | |
289 | break; | |
290 | ||
291 | case '\\': /* Escape next character */ | |
292 | if (p == pend) | |
293 | return false; | |
294 | c = *p++; | |
295 | if (n == nend || *n != c) | |
296 | return false; | |
297 | break; | |
298 | ||
299 | case '*': /* A '*' matches any number of characters */ | |
300 | while (p < pend && (c == '?' || c == '*')) | |
301 | { | |
302 | c = *p++; | |
303 | if (c == '?') | |
304 | { | |
305 | /* A '?' needs to match one character. */ | |
306 | if (n == nend) | |
307 | /* There isn't another character; no match. */ | |
308 | return false; | |
309 | else | |
310 | /* One character of the name is consumed in matching | |
311 | this ? wildcard, so *??? won't match if there are | |
312 | less than three characters. */ | |
313 | ++n; | |
314 | } | |
315 | } | |
316 | ||
317 | if (p == pend) | |
318 | return true; | |
319 | ||
320 | { | |
321 | unsigned char c1 = (c == '\\') ? *p : c; | |
322 | for (--p; n != nend; ++n) /* Eat up all chars */ | |
323 | if ((c == '[' || *n == c1) && strmatch (p, pend-p, n, nend-n)) | |
324 | return true; | |
325 | return false; | |
326 | } | |
327 | ||
328 | case '[': /* A '[A-Z]' matches any char between 'A' and 'Z' */ | |
329 | { | |
330 | /* Nonzero if the sense of the character class is inverted. */ | |
331 | register bool invert; | |
332 | ||
333 | if (n == nend) | |
334 | return false; | |
335 | ||
336 | invert = (*p == '^'); | |
337 | if (invert) | |
338 | ++p; | |
339 | ||
340 | if (p == pend) | |
341 | /* [ (unterminated) loses. */ | |
342 | return false; | |
343 | ||
344 | c = *p++; | |
345 | for (;;) | |
346 | { | |
347 | register unsigned char cstart = c, cend = c; | |
348 | ||
349 | if (p == pend) | |
350 | /* [ (unterminated) loses. */ | |
351 | return false; | |
352 | ||
353 | c = *p++; | |
354 | ||
355 | if (c == '-' && *p != ']') | |
356 | { | |
357 | if (p == pend) | |
358 | return false; | |
359 | cend = *p++; | |
360 | ||
361 | c = *p++; | |
362 | } | |
363 | ||
364 | if (*n >= cstart && *n <= cend) | |
365 | goto matched; | |
366 | ||
367 | if (c == ']') | |
368 | break; | |
369 | } | |
370 | if (!invert) | |
371 | return false; | |
372 | break; | |
373 | ||
374 | matched:; | |
375 | /* Skip the rest of the [...] that already matched. */ | |
376 | while (c != ']') | |
377 | { | |
378 | if (p == pend) | |
379 | /* [... (unterminated) loses. */ | |
380 | return false; | |
381 | ||
382 | c = *p++; | |
383 | } | |
384 | if (invert) | |
385 | return false; | |
386 | } | |
387 | break; | |
388 | ||
389 | default: | |
390 | if (n == nend || c != *n) | |
391 | return false; | |
392 | } | |
393 | ||
394 | ++n; | |
395 | } | |
396 | ||
397 | if (n == nend) | |
398 | return true; | |
399 | ||
400 | return false; | |
401 | } | |
402 | ||
403 | bool | |
404 | String::matches (String const &pattern) const | |
405 | { | |
406 | return strmatch (pattern.theData->theString, pattern.theData->length, | |
407 | theData->theString, theData->length); | |
408 | } | |
409 | ||
25e21380 IP |
410 | String |
411 | String::replace (char pattern, char replacement) const | |
412 | { | |
413 | unsigned char *tempcString = new unsigned char [theData->length]; | |
414 | // remove when exceptions are done | |
415 | if (!tempcString) | |
416 | exit (100); | |
417 | unsigned char *s = theData->theString; | |
418 | unsigned char *d = tempcString; | |
419 | unsigned char *end = theData->theString + theData->length; | |
420 | for (s = theData->theString; s < end; ++s) | |
421 | { | |
422 | if (*s == pattern) | |
423 | *d++ = replacement; | |
424 | else | |
425 | *d++ = *s; | |
426 | } | |
427 | return absorb (tempcString, theData->length); | |
428 | } | |
429 | ||
430 | String | |
431 | String::replace (String const &pattern, String const &replacement) const | |
432 | { | |
433 | int growth = replacement.theData->length - pattern.theData->length + 1; | |
434 | if (growth < 1) growth = 1; | |
435 | unsigned char *tempcString = new unsigned char [theData->length * growth]; | |
436 | // remove when exceptions are done | |
437 | if (!tempcString) | |
438 | exit (100); | |
439 | unsigned char *s = theData->theString; | |
440 | unsigned char *d = tempcString; | |
441 | unsigned char *end = theData->theString + theData->length; | |
442 | for (s = theData->theString; s < end - pattern.theData->length; ) | |
443 | { | |
444 | if (memcmp(s, pattern.theData->theString, pattern.theData->length) == 0) | |
445 | { | |
446 | s += pattern.theData->length; | |
447 | memcpy(d, replacement.theData->theString, replacement.theData->length); | |
448 | d += replacement.theData->length; | |
449 | } | |
450 | else | |
451 | *d++ = *s++; | |
452 | } | |
453 | for (; s < end; ) | |
454 | *d++ = *s++; | |
455 | size_t length = d - tempcString; | |
456 | // Avoid wasting space | |
457 | unsigned char *newCopy = new unsigned char[length]; | |
458 | // remove when exceptions are done | |
459 | if (!newCopy) | |
460 | exit (100); | |
461 | memcpy (newCopy, tempcString, length); | |
462 | delete[] tempcString; | |
463 | ||
464 | return absorb (newCopy, length); | |
465 | } | |
466 | ||
6391823e RC |
467 | /* TODO: research how wide char and unicode interoperate with |
468 | * C++ streams | |
469 | */ | |
470 | ostream & | |
471 | operator << (ostream &os, String const &theString) | |
472 | { | |
473 | os << theString.cstr_oneuse(); | |
474 | return os; | |
475 | } |