1 | #if 0 |
---|
2 | liblilxml |
---|
3 | Copyright (C) 2003 Elwood C. Downey |
---|
4 | |
---|
5 | This library is free software; you can redistribute it and/or |
---|
6 | modify it under the terms of the GNU Lesser General Public |
---|
7 | License as published by the Free Software Foundation; either |
---|
8 | version 2.1 of the License, or (at your option) any later version. |
---|
9 | |
---|
10 | This library is distributed in the hope that it will be useful, |
---|
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | Lesser General Public License for more details. |
---|
14 | |
---|
15 | You should have received a copy of the GNU Lesser General Public |
---|
16 | License along with this library; if not, write to the Free Software |
---|
17 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
---|
18 | |
---|
19 | #endif |
---|
20 | |
---|
21 | /* little DOM-style XML parser. |
---|
22 | * only handles elements, attributes and pcdata content. |
---|
23 | * <! ... > and <? ... > are silently ignored. |
---|
24 | * pcdata is collected into one string, sans leading whitespace first line. |
---|
25 | * |
---|
26 | * #define MAIN_TST to create standalone test program |
---|
27 | */ |
---|
28 | |
---|
29 | #include <stdlib.h> |
---|
30 | #include <string.h> |
---|
31 | #include <ctype.h> |
---|
32 | |
---|
33 | #include "lilxml.h" |
---|
34 | |
---|
35 | /* used to efficiently manage growing malloced string space */ |
---|
36 | typedef struct { |
---|
37 | char *s; /* malloced memory for string */ |
---|
38 | int sl; /* string length, sans trailing \0 */ |
---|
39 | int sm; /* total malloced bytes */ |
---|
40 | } String; |
---|
41 | #define MINMEM 64 /* starting string length */ |
---|
42 | |
---|
43 | static int oneXMLchar (LilXML *lp, int c, char errmsg[]); |
---|
44 | static void initParser(LilXML *lp); |
---|
45 | static void pushXMLEle(LilXML *lp); |
---|
46 | static void popXMLEle(LilXML *lp); |
---|
47 | static void resetEndTag(LilXML *lp); |
---|
48 | static XMLAtt *growAtt(XMLEle *e); |
---|
49 | static XMLEle *growEle(XMLEle *pe); |
---|
50 | static void freeAtt (XMLAtt *a); |
---|
51 | static int isTokenChar (int start, int c); |
---|
52 | static void growString (String *sp, int c); |
---|
53 | static void appendString (String *sp, const char *str); |
---|
54 | static void freeString (String *sp); |
---|
55 | static void newString (String *sp); |
---|
56 | static void *moremem (void *old, int n); |
---|
57 | |
---|
58 | typedef enum { |
---|
59 | LOOK4START = 0, /* looking for first element start */ |
---|
60 | LOOK4TAG, /* looking for element tag */ |
---|
61 | INTAG, /* reading tag */ |
---|
62 | LOOK4ATTRN, /* looking for attr name, > or / */ |
---|
63 | INATTRN, /* reading attr name */ |
---|
64 | LOOK4ATTRV, /* looking for attr value */ |
---|
65 | SAWSLASH, /* saw / in element opening */ |
---|
66 | INATTRV, /* in attr value */ |
---|
67 | ENTINATTRV, /* in entity in attr value */ |
---|
68 | LOOK4CON, /* skipping leading content whitespc */ |
---|
69 | INCON, /* reading content */ |
---|
70 | ENTINCON, /* in entity in pcdata */ |
---|
71 | SAWLTINCON, /* saw < in content */ |
---|
72 | LOOK4CLOSETAG, /* looking for closing tag after < */ |
---|
73 | INCLOSETAG /* reading closing tag */ |
---|
74 | } State; /* parsing states */ |
---|
75 | |
---|
76 | /* maintain state while parsing */ |
---|
77 | struct _LilXML { |
---|
78 | State cs; /* current state */ |
---|
79 | int ln; /* line number for diags */ |
---|
80 | XMLEle *ce; /* current element being built */ |
---|
81 | String endtag; /* to check for match with opening tag*/ |
---|
82 | String entity; /* collect entity seq */ |
---|
83 | int delim; /* attribute value delimiter */ |
---|
84 | int lastc; /* last char (just used wiht skipping)*/ |
---|
85 | int skipping; /* in comment or declaration */ |
---|
86 | }; |
---|
87 | |
---|
88 | /* internal representation of a (possibly nested) XML element */ |
---|
89 | struct _xml_ele { |
---|
90 | String tag; /* element tag */ |
---|
91 | XMLEle *pe; /* parent element, or NULL if root */ |
---|
92 | XMLAtt **at; /* list of attributes */ |
---|
93 | int nat; /* number of attributes */ |
---|
94 | int ait; /* used to iterate over at[] */ |
---|
95 | XMLEle **el; /* list of child elements */ |
---|
96 | int nel; /* number of child elements */ |
---|
97 | int eit; /* used to iterate over el[] */ |
---|
98 | String pcdata; /* character data in this element */ |
---|
99 | int pcdata_hasent; /* 1 if pcdata contains an entity char*/ |
---|
100 | }; |
---|
101 | |
---|
102 | /* internal representation of an attribute */ |
---|
103 | struct _xml_att { |
---|
104 | String name; /* name */ |
---|
105 | String valu; /* value */ |
---|
106 | XMLEle *ce; /* containing element */ |
---|
107 | }; |
---|
108 | |
---|
109 | /* characters that need escaping as "entities" in attr values and pcdata |
---|
110 | */ |
---|
111 | static char entities[] = "&<>'\""; |
---|
112 | |
---|
113 | /* default memory managers, override with indi_xmlMalloc() */ |
---|
114 | static void *(*mymalloc)(size_t size) = malloc; |
---|
115 | static void *(*myrealloc)(void *ptr, size_t size) = realloc; |
---|
116 | static void (*myfree)(void *ptr) = free; |
---|
117 | |
---|
118 | /* install new version of malloc/realloc/free. |
---|
119 | * N.B. don't call after first use of any other lilxml function |
---|
120 | */ |
---|
121 | void |
---|
122 | indi_xmlMalloc (void *(*newmalloc)(size_t size), |
---|
123 | void *(*newrealloc)(void *ptr, size_t size), |
---|
124 | void (*newfree)(void *ptr)) |
---|
125 | { |
---|
126 | mymalloc = newmalloc; |
---|
127 | myrealloc = newrealloc; |
---|
128 | myfree = newfree; |
---|
129 | } |
---|
130 | |
---|
131 | /* pass back a fresh handle for use with our other functions */ |
---|
132 | LilXML * |
---|
133 | newLilXML () |
---|
134 | { |
---|
135 | LilXML *lp = (LilXML *) moremem (NULL, sizeof(LilXML)); |
---|
136 | memset (lp, 0, sizeof(LilXML)); |
---|
137 | initParser(lp); |
---|
138 | return (lp); |
---|
139 | } |
---|
140 | |
---|
141 | /* discard */ |
---|
142 | void |
---|
143 | delLilXML (LilXML *lp) |
---|
144 | { |
---|
145 | delXMLEle (lp->ce); |
---|
146 | freeString (&lp->endtag); |
---|
147 | (*myfree) (lp); |
---|
148 | } |
---|
149 | |
---|
150 | /* delete ep and all its children and remove from parent's list if known */ |
---|
151 | void |
---|
152 | delXMLEle (XMLEle *ep) |
---|
153 | { |
---|
154 | int i; |
---|
155 | |
---|
156 | /* benign if NULL */ |
---|
157 | if (!ep) |
---|
158 | return; |
---|
159 | |
---|
160 | /* delete all parts of ep */ |
---|
161 | freeString (&ep->tag); |
---|
162 | freeString (&ep->pcdata); |
---|
163 | if (ep->at) { |
---|
164 | for (i = 0; i < ep->nat; i++) |
---|
165 | freeAtt (ep->at[i]); |
---|
166 | (*myfree) (ep->at); |
---|
167 | } |
---|
168 | if (ep->el) { |
---|
169 | for (i = 0; i < ep->nel; i++) { |
---|
170 | /* forget parent so deleting doesn't modify _this_ el[] */ |
---|
171 | ep->el[i]->pe = NULL; |
---|
172 | |
---|
173 | delXMLEle (ep->el[i]); |
---|
174 | } |
---|
175 | (*myfree) (ep->el); |
---|
176 | } |
---|
177 | |
---|
178 | /* remove from parent's list if known */ |
---|
179 | if (ep->pe) { |
---|
180 | XMLEle *pe = ep->pe; |
---|
181 | for (i = 0; i < pe->nel; i++) { |
---|
182 | if (pe->el[i] == ep) { |
---|
183 | memmove (&pe->el[i], &pe->el[i+1], |
---|
184 | (--pe->nel-i)*sizeof(XMLEle*)); |
---|
185 | break; |
---|
186 | } |
---|
187 | } |
---|
188 | } |
---|
189 | |
---|
190 | /* delete ep itself */ |
---|
191 | (*myfree) (ep); |
---|
192 | } |
---|
193 | |
---|
194 | /* process one more character of an XML file. |
---|
195 | * when find closure with outter element return root of complete tree. |
---|
196 | * when find error return NULL with reason in errmsg[]. |
---|
197 | * when need more return NULL with errmsg[0] = '\0'. |
---|
198 | * N.B. it is up to the caller to delete any tree returned with delXMLEle(). |
---|
199 | */ |
---|
200 | XMLEle * |
---|
201 | readXMLEle (LilXML *lp, int newc, char errmsg[]) |
---|
202 | { |
---|
203 | XMLEle *root; |
---|
204 | int s; |
---|
205 | |
---|
206 | /* start optimistic */ |
---|
207 | errmsg[0] = '\0'; |
---|
208 | |
---|
209 | /* EOF? */ |
---|
210 | if (newc == 0) { |
---|
211 | sprintf (errmsg, "Line %d: early XML EOF", lp->ln); |
---|
212 | initParser(lp); |
---|
213 | return (NULL); |
---|
214 | } |
---|
215 | |
---|
216 | /* new line? */ |
---|
217 | if (newc == '\n') |
---|
218 | lp->ln++; |
---|
219 | |
---|
220 | /* skip comments and declarations. requires 1 char history */ |
---|
221 | if (!lp->skipping && lp->lastc == '<' && (newc == '?' || newc == '!')) { |
---|
222 | lp->skipping = 1; |
---|
223 | lp->lastc = newc; |
---|
224 | return (NULL); |
---|
225 | } |
---|
226 | if (lp->skipping) { |
---|
227 | if (newc == '>') |
---|
228 | lp->skipping = 0; |
---|
229 | lp->lastc = newc; |
---|
230 | return (NULL); |
---|
231 | } |
---|
232 | if (newc == '<') { |
---|
233 | lp->lastc = '<'; |
---|
234 | return (NULL); |
---|
235 | } |
---|
236 | |
---|
237 | /* do a pending '<' first then newc */ |
---|
238 | if (lp->lastc == '<') { |
---|
239 | if (oneXMLchar (lp, '<', errmsg) < 0) { |
---|
240 | initParser(lp); |
---|
241 | return (NULL); |
---|
242 | } |
---|
243 | /* N.B. we assume '<' will never result in closure */ |
---|
244 | } |
---|
245 | |
---|
246 | /* process newc (at last!) */ |
---|
247 | s = oneXMLchar (lp, newc, errmsg); |
---|
248 | if (s == 0) { |
---|
249 | lp->lastc = newc; |
---|
250 | return (NULL); |
---|
251 | } |
---|
252 | if (s < 0) { |
---|
253 | initParser(lp); |
---|
254 | return (NULL); |
---|
255 | } |
---|
256 | |
---|
257 | /* Ok! return ce and we start over. |
---|
258 | * N.B. up to caller to call delXMLEle with what we return. |
---|
259 | */ |
---|
260 | root = lp->ce; |
---|
261 | lp->ce = NULL; |
---|
262 | initParser(lp); |
---|
263 | return (root); |
---|
264 | } |
---|
265 | |
---|
266 | /* search ep for an attribute with given name. |
---|
267 | * return NULL if not found. |
---|
268 | */ |
---|
269 | XMLAtt * |
---|
270 | findXMLAtt (XMLEle *ep, const char *name) |
---|
271 | { |
---|
272 | int i; |
---|
273 | |
---|
274 | for (i = 0; i < ep->nat; i++) |
---|
275 | if (!strcmp (ep->at[i]->name.s, name)) |
---|
276 | return (ep->at[i]); |
---|
277 | return (NULL); |
---|
278 | } |
---|
279 | |
---|
280 | /* search ep for an element with given tag. |
---|
281 | * return NULL if not found. |
---|
282 | */ |
---|
283 | XMLEle * |
---|
284 | findXMLEle (XMLEle *ep, const char *tag) |
---|
285 | { |
---|
286 | int tl = strlen (tag); |
---|
287 | int i; |
---|
288 | |
---|
289 | for (i = 0; i < ep->nel; i++) { |
---|
290 | String *sp = &ep->el[i]->tag; |
---|
291 | if (sp->sl == tl && !strcmp (sp->s, tag)) |
---|
292 | return (ep->el[i]); |
---|
293 | } |
---|
294 | return (NULL); |
---|
295 | } |
---|
296 | |
---|
297 | /* iterate over each child element of ep. |
---|
298 | * call first time with first set to 1, then 0 from then on. |
---|
299 | * returns NULL when no more or err |
---|
300 | */ |
---|
301 | XMLEle * |
---|
302 | nextXMLEle (XMLEle *ep, int init) |
---|
303 | { |
---|
304 | int eit; |
---|
305 | |
---|
306 | if (init) |
---|
307 | ep->eit = 0; |
---|
308 | |
---|
309 | eit = ep->eit++; |
---|
310 | if (eit < 0 || eit >= ep->nel) |
---|
311 | return (NULL); |
---|
312 | return (ep->el[eit]); |
---|
313 | } |
---|
314 | |
---|
315 | /* iterate over each attribute of ep. |
---|
316 | * call first time with first set to 1, then 0 from then on. |
---|
317 | * returns NULL when no more or err |
---|
318 | */ |
---|
319 | XMLAtt * |
---|
320 | nextXMLAtt (XMLEle *ep, int init) |
---|
321 | { |
---|
322 | int ait; |
---|
323 | |
---|
324 | if (init) |
---|
325 | ep->ait = 0; |
---|
326 | |
---|
327 | ait = ep->ait++; |
---|
328 | if (ait < 0 || ait >= ep->nat) |
---|
329 | return (NULL); |
---|
330 | return (ep->at[ait]); |
---|
331 | } |
---|
332 | |
---|
333 | /* return parent of given XMLEle */ |
---|
334 | XMLEle * |
---|
335 | parentXMLEle (XMLEle *ep) |
---|
336 | { |
---|
337 | return (ep->pe); |
---|
338 | } |
---|
339 | |
---|
340 | /* return parent element of given XMLAtt */ |
---|
341 | XMLEle * |
---|
342 | parentXMLAtt (XMLAtt *ap) |
---|
343 | { |
---|
344 | return (ap->ce); |
---|
345 | } |
---|
346 | |
---|
347 | /* access functions */ |
---|
348 | |
---|
349 | /* return the tag name of the given element */ |
---|
350 | char * |
---|
351 | tagXMLEle (XMLEle *ep) |
---|
352 | { |
---|
353 | return (ep->tag.s); |
---|
354 | } |
---|
355 | |
---|
356 | /* return the pcdata portion of the given element */ |
---|
357 | char * |
---|
358 | pcdataXMLEle (XMLEle *ep) |
---|
359 | { |
---|
360 | return (ep->pcdata.s); |
---|
361 | } |
---|
362 | |
---|
363 | /* return the number of characters in the pcdata portion of the given element */ |
---|
364 | int |
---|
365 | pcdatalenXMLEle (XMLEle *ep) |
---|
366 | { |
---|
367 | return (ep->pcdata.sl); |
---|
368 | } |
---|
369 | |
---|
370 | /* return the name of the given attribute */ |
---|
371 | char * |
---|
372 | nameXMLAtt (XMLAtt *ap) |
---|
373 | { |
---|
374 | return (ap->name.s); |
---|
375 | } |
---|
376 | |
---|
377 | /* return the value of the given attribute */ |
---|
378 | char * |
---|
379 | valuXMLAtt (XMLAtt *ap) |
---|
380 | { |
---|
381 | return (ap->valu.s); |
---|
382 | } |
---|
383 | |
---|
384 | /* return the number of child elements of the given element */ |
---|
385 | int |
---|
386 | nXMLEle (XMLEle *ep) |
---|
387 | { |
---|
388 | return (ep->nel); |
---|
389 | } |
---|
390 | |
---|
391 | /* return the number of attributes in the given element */ |
---|
392 | int |
---|
393 | nXMLAtt (XMLEle *ep) |
---|
394 | { |
---|
395 | return (ep->nat); |
---|
396 | } |
---|
397 | |
---|
398 | |
---|
399 | /* search ep for an attribute with the given name and return its value. |
---|
400 | * return "" if not found. |
---|
401 | */ |
---|
402 | const char * |
---|
403 | findXMLAttValu (XMLEle *ep, const char *name) |
---|
404 | { |
---|
405 | XMLAtt *a = findXMLAtt (ep, name); |
---|
406 | return (a ? a->valu.s : ""); |
---|
407 | } |
---|
408 | |
---|
409 | /* handy wrapper to read one xml file. |
---|
410 | * return root element else NULL with report in errmsg[] |
---|
411 | */ |
---|
412 | XMLEle * |
---|
413 | readXMLFile (FILE *fp, LilXML *lp, char errmsg[]) |
---|
414 | { |
---|
415 | int c; |
---|
416 | |
---|
417 | while ((c = fgetc(fp)) != EOF) { |
---|
418 | XMLEle *root = readXMLEle (lp, c, errmsg); |
---|
419 | if (root || errmsg[0]) |
---|
420 | return (root); |
---|
421 | } |
---|
422 | |
---|
423 | return (NULL); |
---|
424 | } |
---|
425 | |
---|
426 | /* add an element with the given tag to the given element. |
---|
427 | * parent can be NULL to make a new root. |
---|
428 | */ |
---|
429 | XMLEle * |
---|
430 | addXMLEle (XMLEle *parent, const char *tag) |
---|
431 | { |
---|
432 | XMLEle *ep = growEle (parent); |
---|
433 | appendString (&ep->tag, tag); |
---|
434 | return (ep); |
---|
435 | } |
---|
436 | |
---|
437 | /* set the pcdata of the given element */ |
---|
438 | void |
---|
439 | editXMLEle (XMLEle *ep, const char *pcdata) |
---|
440 | { |
---|
441 | freeString (&ep->pcdata); |
---|
442 | appendString (&ep->pcdata, pcdata); |
---|
443 | ep->pcdata_hasent = (strpbrk (pcdata, entities) != NULL); |
---|
444 | } |
---|
445 | |
---|
446 | /* add an attribute to the given XML element */ |
---|
447 | XMLAtt * |
---|
448 | addXMLAtt (XMLEle *ep, const char *name, const char *valu) |
---|
449 | { |
---|
450 | XMLAtt *ap = growAtt (ep); |
---|
451 | appendString (&ap->name, name); |
---|
452 | appendString (&ap->valu, valu); |
---|
453 | return (ap); |
---|
454 | } |
---|
455 | |
---|
456 | /* remove the named attribute from ep, if any */ |
---|
457 | void |
---|
458 | rmXMLAtt (XMLEle *ep, const char *name) |
---|
459 | { |
---|
460 | int i; |
---|
461 | |
---|
462 | for (i = 0; i < ep->nat; i++) { |
---|
463 | if (strcmp (ep->at[i]->name.s, name) == 0) { |
---|
464 | freeAtt (ep->at[i]); |
---|
465 | memmove (&ep->at[i],&ep->at[i+1],(--ep->nat-i)*sizeof(XMLAtt*)); |
---|
466 | return; |
---|
467 | } |
---|
468 | } |
---|
469 | } |
---|
470 | |
---|
471 | /* change the value of an attribute to str */ |
---|
472 | void |
---|
473 | editXMLAtt (XMLAtt *ap, const char *str) |
---|
474 | { |
---|
475 | freeString (&ap->valu); |
---|
476 | appendString (&ap->valu, str); |
---|
477 | } |
---|
478 | |
---|
479 | /* sample print ep to fp |
---|
480 | * N.B. set level = 0 on first call |
---|
481 | */ |
---|
482 | #define PRINDENT 4 /* sample print indent each level */ |
---|
483 | void |
---|
484 | prXMLEle (FILE *fp, XMLEle *ep, int level) |
---|
485 | { |
---|
486 | int indent = level*PRINDENT; |
---|
487 | int i; |
---|
488 | |
---|
489 | fprintf (fp, "%*s<%s", indent, "", ep->tag.s); |
---|
490 | for (i = 0; i < ep->nat; i++) |
---|
491 | fprintf (fp, " %s=\"%s\"", ep->at[i]->name.s, |
---|
492 | entityXML(ep->at[i]->valu.s)); |
---|
493 | if (ep->nel > 0) { |
---|
494 | fprintf (fp, ">\n"); |
---|
495 | for (i = 0; i < ep->nel; i++) |
---|
496 | prXMLEle (fp, ep->el[i], level+1); |
---|
497 | } |
---|
498 | if (ep->pcdata.sl > 0) { |
---|
499 | if (ep->nel == 0) |
---|
500 | fprintf (fp, ">\n"); |
---|
501 | if (ep->pcdata_hasent) |
---|
502 | fprintf (fp, "%s", entityXML(ep->pcdata.s)); |
---|
503 | else |
---|
504 | fprintf (fp, "%s", ep->pcdata.s); |
---|
505 | if (ep->pcdata.s[ep->pcdata.sl-1] != '\n') |
---|
506 | fprintf (fp, "\n"); |
---|
507 | } |
---|
508 | if (ep->nel > 0 || ep->pcdata.sl > 0) |
---|
509 | fprintf (fp, "%*s</%s>\n", indent, "", ep->tag.s); |
---|
510 | else |
---|
511 | fprintf (fp, "/>\n"); |
---|
512 | } |
---|
513 | |
---|
514 | /* sample print ep to string s. |
---|
515 | * N.B. s must be at least as large as that reported by sprlXMLEle()+1. |
---|
516 | * N.B. set level = 0 on first call |
---|
517 | * return length of resulting string (sans trailing \0) |
---|
518 | */ |
---|
519 | int |
---|
520 | sprXMLEle (char *s, XMLEle *ep, int level) |
---|
521 | { |
---|
522 | int indent = level*PRINDENT; |
---|
523 | int sl = 0; |
---|
524 | int i; |
---|
525 | |
---|
526 | sl += sprintf (s+sl, "%*s<%s", indent, "", ep->tag.s); |
---|
527 | for (i = 0; i < ep->nat; i++) |
---|
528 | sl += sprintf (s+sl, " %s=\"%s\"", ep->at[i]->name.s, |
---|
529 | entityXML(ep->at[i]->valu.s)); |
---|
530 | if (ep->nel > 0) { |
---|
531 | sl += sprintf (s+sl, ">\n"); |
---|
532 | for (i = 0; i < ep->nel; i++) |
---|
533 | sl += sprXMLEle (s+sl, ep->el[i], level+1); |
---|
534 | } |
---|
535 | if (ep->pcdata.sl > 0) { |
---|
536 | if (ep->nel == 0) |
---|
537 | sl += sprintf (s+sl, ">\n"); |
---|
538 | if (ep->pcdata_hasent) |
---|
539 | sl += sprintf (s+sl, "%s", entityXML(ep->pcdata.s)); |
---|
540 | else { |
---|
541 | strcpy (s+sl, ep->pcdata.s); |
---|
542 | sl += ep->pcdata.sl; |
---|
543 | } |
---|
544 | if (ep->pcdata.s[ep->pcdata.sl-1] != '\n') |
---|
545 | sl += sprintf (s+sl, "\n"); |
---|
546 | } |
---|
547 | if (ep->nel > 0 || ep->pcdata.sl > 0) |
---|
548 | sl += sprintf (s+sl, "%*s</%s>\n", indent, "", ep->tag.s); |
---|
549 | else |
---|
550 | sl += sprintf (s+sl, "/>\n"); |
---|
551 | |
---|
552 | return (sl); |
---|
553 | } |
---|
554 | |
---|
555 | /* return number of bytes in a string guaranteed able to hold result of |
---|
556 | * sprXLMEle(ep) (sans trailing \0). |
---|
557 | * N.B. set level = 0 on first call |
---|
558 | */ |
---|
559 | int |
---|
560 | sprlXMLEle (XMLEle *ep, int level) |
---|
561 | { |
---|
562 | int indent = level*PRINDENT; |
---|
563 | int l = 0; |
---|
564 | int i; |
---|
565 | |
---|
566 | l += indent + 1 + ep->tag.sl; |
---|
567 | for (i = 0; i < ep->nat; i++) |
---|
568 | l += ep->at[i]->name.sl + 4 + strlen(entityXML(ep->at[i]->valu.s)); |
---|
569 | |
---|
570 | if (ep->nel > 0) { |
---|
571 | l += 2; |
---|
572 | for (i = 0; i < ep->nel; i++) |
---|
573 | l += sprlXMLEle (ep->el[i], level+1); |
---|
574 | } |
---|
575 | if (ep->pcdata.sl > 0) { |
---|
576 | if (ep->nel == 0) |
---|
577 | l += 2; |
---|
578 | if (ep->pcdata_hasent) |
---|
579 | l += strlen (entityXML(ep->pcdata.s)); |
---|
580 | else |
---|
581 | l += ep->pcdata.sl; |
---|
582 | if (ep->pcdata.s[ep->pcdata.sl-1] != '\n') |
---|
583 | l += 1; |
---|
584 | } |
---|
585 | if (ep->nel > 0 || ep->pcdata.sl > 0) |
---|
586 | l += indent + 4 + ep->tag.sl; |
---|
587 | else |
---|
588 | l += 3; |
---|
589 | |
---|
590 | return (l); |
---|
591 | } |
---|
592 | |
---|
593 | /* return a string with all xml-sensitive characters within the passed string s |
---|
594 | * replaced with their entity sequence equivalents. |
---|
595 | * N.B. caller must use the returned string before calling us again. |
---|
596 | */ |
---|
597 | char * |
---|
598 | entityXML (char *s) |
---|
599 | { |
---|
600 | static char *malbuf; |
---|
601 | int nmalbuf = 0; |
---|
602 | char *sret; |
---|
603 | char *ep; |
---|
604 | |
---|
605 | /* scan for each entity, if any */ |
---|
606 | for (sret = s; (ep = strpbrk (s, entities)) != NULL; s = ep+1) { |
---|
607 | |
---|
608 | /* found another entity, copy preceding to malloced buffer */ |
---|
609 | int nnew = ep - s; /* all but entity itself */ |
---|
610 | sret = malbuf = moremem (malbuf, nmalbuf + nnew + 10); |
---|
611 | memcpy (malbuf+nmalbuf, s, nnew); |
---|
612 | nmalbuf += nnew; |
---|
613 | |
---|
614 | /* replace with entity encoding */ |
---|
615 | switch (*ep) { |
---|
616 | case '&': |
---|
617 | nmalbuf += sprintf (malbuf+nmalbuf, "&"); |
---|
618 | break; |
---|
619 | case '<': |
---|
620 | nmalbuf += sprintf (malbuf+nmalbuf, "<"); |
---|
621 | break; |
---|
622 | case '>': |
---|
623 | nmalbuf += sprintf (malbuf+nmalbuf, ">"); |
---|
624 | break; |
---|
625 | case '\'': |
---|
626 | nmalbuf += sprintf (malbuf+nmalbuf, "'"); |
---|
627 | break; |
---|
628 | case '"': |
---|
629 | nmalbuf += sprintf (malbuf+nmalbuf, """); |
---|
630 | break; |
---|
631 | |
---|
632 | } |
---|
633 | |
---|
634 | } |
---|
635 | |
---|
636 | /* return s if no entities, else malloc cleaned-up copy */ |
---|
637 | if (sret == s) { |
---|
638 | /* using s, so free any malloced memory from last time */ |
---|
639 | if (malbuf) { |
---|
640 | free (malbuf); |
---|
641 | malbuf = NULL; |
---|
642 | } |
---|
643 | } else { |
---|
644 | /* put remaining part of s into malbuf */ |
---|
645 | int nleft = strlen (s) + 1; /* include \0 */ |
---|
646 | sret = malbuf = moremem (malbuf, nmalbuf + nleft); |
---|
647 | memcpy (malbuf+nmalbuf, s, nleft); |
---|
648 | } |
---|
649 | |
---|
650 | return (sret); |
---|
651 | } |
---|
652 | |
---|
653 | /* if ent is a recognized xml entity sequence, set *cp to char and return 1 |
---|
654 | * else return 0 |
---|
655 | */ |
---|
656 | static int |
---|
657 | decodeEntity (char *ent, int *cp) |
---|
658 | { |
---|
659 | static struct { |
---|
660 | const char *ent; |
---|
661 | char c; |
---|
662 | } enttable[] = { |
---|
663 | {"&", '&'}, |
---|
664 | {"'", '\''}, |
---|
665 | {"<", '<'}, |
---|
666 | {">", '>'}, |
---|
667 | {""", '"'}, |
---|
668 | }; |
---|
669 | unsigned int i; |
---|
670 | |
---|
671 | for (i = 0; i < sizeof(enttable)/sizeof(enttable[0]); i++) { |
---|
672 | if (strcmp (ent, enttable[i].ent) == 0) { |
---|
673 | *cp = enttable[i].c; |
---|
674 | return (1); |
---|
675 | } |
---|
676 | } |
---|
677 | |
---|
678 | return (0); |
---|
679 | } |
---|
680 | |
---|
681 | /* process one more char in XML file. |
---|
682 | * if find final closure, return 1 and tree is in ce. |
---|
683 | * if need more, return 0. |
---|
684 | * if real trouble, return -1 and put reason in errmsg. |
---|
685 | */ |
---|
686 | static int |
---|
687 | oneXMLchar (LilXML *lp, int c, char errmsg[]) |
---|
688 | { |
---|
689 | switch (lp->cs) { |
---|
690 | case LOOK4START: /* looking for first element start */ |
---|
691 | if (c == '<') { |
---|
692 | pushXMLEle(lp); |
---|
693 | lp->cs = LOOK4TAG; |
---|
694 | } |
---|
695 | /* silently ignore until resync */ |
---|
696 | break; |
---|
697 | |
---|
698 | case LOOK4TAG: /* looking for element tag */ |
---|
699 | if (isTokenChar (1, c)) { |
---|
700 | growString (&lp->ce->tag, c); |
---|
701 | lp->cs = INTAG; |
---|
702 | } else if (!isspace(c)) { |
---|
703 | sprintf (errmsg, "Line %d: Bogus tag char %c", lp->ln, c); |
---|
704 | return (-1); |
---|
705 | } |
---|
706 | break; |
---|
707 | |
---|
708 | case INTAG: /* reading tag */ |
---|
709 | if (isTokenChar (0, c)) |
---|
710 | growString (&lp->ce->tag, c); |
---|
711 | else if (c == '>') |
---|
712 | lp->cs = LOOK4CON; |
---|
713 | else if (c == '/') |
---|
714 | lp->cs = SAWSLASH; |
---|
715 | else |
---|
716 | lp->cs = LOOK4ATTRN; |
---|
717 | break; |
---|
718 | |
---|
719 | case LOOK4ATTRN: /* looking for attr name, > or / */ |
---|
720 | if (c == '>') |
---|
721 | lp->cs = LOOK4CON; |
---|
722 | else if (c == '/') |
---|
723 | lp->cs = SAWSLASH; |
---|
724 | else if (isTokenChar (1, c)) { |
---|
725 | XMLAtt *ap = growAtt(lp->ce); |
---|
726 | growString (&ap->name, c); |
---|
727 | lp->cs = INATTRN; |
---|
728 | } else if (!isspace(c)) { |
---|
729 | sprintf (errmsg, "Line %d: Bogus leading attr name char: %c", |
---|
730 | lp->ln, c); |
---|
731 | return (-1); |
---|
732 | } |
---|
733 | break; |
---|
734 | |
---|
735 | case SAWSLASH: /* saw / in element opening */ |
---|
736 | if (c == '>') { |
---|
737 | if (!lp->ce->pe) |
---|
738 | return(1); /* root has no content */ |
---|
739 | popXMLEle(lp); |
---|
740 | lp->cs = LOOK4CON; |
---|
741 | } else { |
---|
742 | sprintf (errmsg, "Line %d: Bogus char %c before >", lp->ln, c); |
---|
743 | return (-1); |
---|
744 | } |
---|
745 | break; |
---|
746 | |
---|
747 | case INATTRN: /* reading attr name */ |
---|
748 | if (isTokenChar (0, c)) |
---|
749 | growString (&lp->ce->at[lp->ce->nat-1]->name, c); |
---|
750 | else if (isspace(c) || c == '=') |
---|
751 | lp->cs = LOOK4ATTRV; |
---|
752 | else { |
---|
753 | sprintf (errmsg, "Line %d: Bogus attr name char: %c", lp->ln,c); |
---|
754 | return (-1); |
---|
755 | } |
---|
756 | break; |
---|
757 | |
---|
758 | case LOOK4ATTRV: /* looking for attr value */ |
---|
759 | if (c == '\'' || c == '"') { |
---|
760 | lp->delim = c; |
---|
761 | lp->cs = INATTRV; |
---|
762 | } else if (!(isspace(c) || c == '=')) { |
---|
763 | sprintf (errmsg, "Line %d: No value for attribute %s", lp->ln, |
---|
764 | lp->ce->at[lp->ce->nat-1]->name.s); |
---|
765 | return (-1); |
---|
766 | } |
---|
767 | break; |
---|
768 | |
---|
769 | case INATTRV: /* in attr value */ |
---|
770 | if (c == '&') { |
---|
771 | newString (&lp->entity); |
---|
772 | growString (&lp->entity, c); |
---|
773 | lp->cs = ENTINATTRV; |
---|
774 | } else if (c == lp->delim) |
---|
775 | lp->cs = LOOK4ATTRN; |
---|
776 | else if (!iscntrl(c)) |
---|
777 | growString (&lp->ce->at[lp->ce->nat-1]->valu, c); |
---|
778 | break; |
---|
779 | |
---|
780 | case ENTINATTRV: /* working on entity in attr valu */ |
---|
781 | if (c == ';') { |
---|
782 | /* if find a recongized esp seq, add equiv char else raw seq */ |
---|
783 | growString (&lp->entity, c); |
---|
784 | if (decodeEntity (lp->entity.s, &c)) |
---|
785 | growString (&lp->ce->at[lp->ce->nat-1]->valu, c); |
---|
786 | else |
---|
787 | appendString(&lp->ce->at[lp->ce->nat-1]->valu,lp->entity.s); |
---|
788 | freeString (&lp->entity); |
---|
789 | lp->cs = INATTRV; |
---|
790 | } else |
---|
791 | growString (&lp->entity, c); |
---|
792 | break; |
---|
793 | |
---|
794 | case LOOK4CON: /* skipping leading content whitespace*/ |
---|
795 | if (c == '<') |
---|
796 | lp->cs = SAWLTINCON; |
---|
797 | else if (!isspace(c)) { |
---|
798 | growString (&lp->ce->pcdata, c); |
---|
799 | lp->cs = INCON; |
---|
800 | } |
---|
801 | break; |
---|
802 | |
---|
803 | case INCON: /* reading content */ |
---|
804 | if (c == '&') { |
---|
805 | newString (&lp->entity); |
---|
806 | growString (&lp->entity, c); |
---|
807 | lp->cs = ENTINCON; |
---|
808 | } else if (c == '<') { |
---|
809 | /* chomp trailing whitespace */ |
---|
810 | while (lp->ce->pcdata.sl > 0 && |
---|
811 | isspace(lp->ce->pcdata.s[lp->ce->pcdata.sl-1])) |
---|
812 | lp->ce->pcdata.s[--(lp->ce->pcdata.sl)] = '\0'; |
---|
813 | lp->cs = SAWLTINCON; |
---|
814 | } else { |
---|
815 | growString (&lp->ce->pcdata, c); |
---|
816 | } |
---|
817 | break; |
---|
818 | |
---|
819 | case ENTINCON: /* working on entity in content */ |
---|
820 | if (c == ';') { |
---|
821 | /* if find a recognized esc seq, add equiv char else raw seq */ |
---|
822 | growString (&lp->entity, c); |
---|
823 | if (decodeEntity (lp->entity.s, &c)) |
---|
824 | growString (&lp->ce->pcdata, c); |
---|
825 | else { |
---|
826 | appendString(&lp->ce->pcdata, lp->entity.s); |
---|
827 | lp->ce->pcdata_hasent = 1; |
---|
828 | } |
---|
829 | freeString (&lp->entity); |
---|
830 | lp->cs = INCON; |
---|
831 | } else |
---|
832 | growString (&lp->entity, c); |
---|
833 | break; |
---|
834 | |
---|
835 | case SAWLTINCON: /* saw < in content */ |
---|
836 | if (c == '/') { |
---|
837 | resetEndTag(lp); |
---|
838 | lp->cs = LOOK4CLOSETAG; |
---|
839 | } else { |
---|
840 | pushXMLEle(lp); |
---|
841 | if (isTokenChar(1,c)) { |
---|
842 | growString (&lp->ce->tag, c); |
---|
843 | lp->cs = INTAG; |
---|
844 | } else |
---|
845 | lp->cs = LOOK4TAG; |
---|
846 | } |
---|
847 | break; |
---|
848 | |
---|
849 | case LOOK4CLOSETAG: /* looking for closing tag after < */ |
---|
850 | if (isTokenChar (1, c)) { |
---|
851 | growString (&lp->endtag, c); |
---|
852 | lp->cs = INCLOSETAG; |
---|
853 | } else if (!isspace(c)) { |
---|
854 | sprintf (errmsg, "Line %d: Bogus preend tag char %c", lp->ln,c); |
---|
855 | return (-1); |
---|
856 | } |
---|
857 | break; |
---|
858 | |
---|
859 | case INCLOSETAG: /* reading closing tag */ |
---|
860 | if (isTokenChar(0, c)) |
---|
861 | growString (&lp->endtag, c); |
---|
862 | else if (c == '>') { |
---|
863 | if (strcmp (lp->ce->tag.s, lp->endtag.s)) { |
---|
864 | sprintf (errmsg,"Line %d: closing tag %s does not match %s", |
---|
865 | lp->ln, lp->endtag.s, lp->ce->tag.s); |
---|
866 | return (-1); |
---|
867 | } else if (lp->ce->pe) { |
---|
868 | popXMLEle(lp); |
---|
869 | lp->cs = LOOK4CON; /* back to content after nested elem */ |
---|
870 | } else |
---|
871 | return (1); /* yes! */ |
---|
872 | } else if (!isspace(c)) { |
---|
873 | sprintf (errmsg, "Line %d: Bogus end tag char %c", lp->ln, c); |
---|
874 | return (-1); |
---|
875 | } |
---|
876 | break; |
---|
877 | } |
---|
878 | |
---|
879 | return (0); |
---|
880 | } |
---|
881 | |
---|
882 | /* set up for a fresh start again */ |
---|
883 | static void |
---|
884 | initParser(LilXML *lp) |
---|
885 | { |
---|
886 | delXMLEle (lp->ce); |
---|
887 | freeString (&lp->endtag); |
---|
888 | memset (lp, 0, sizeof(*lp)); |
---|
889 | newString (&lp->endtag); |
---|
890 | lp->cs = LOOK4START; |
---|
891 | lp->ln = 1; |
---|
892 | } |
---|
893 | |
---|
894 | /* start a new XMLEle. |
---|
895 | * point ce to a new XMLEle. |
---|
896 | * if ce already set up, add to its list of child elements too. |
---|
897 | * endtag no longer valid. |
---|
898 | */ |
---|
899 | static void |
---|
900 | pushXMLEle(LilXML *lp) |
---|
901 | { |
---|
902 | lp->ce = growEle (lp->ce); |
---|
903 | resetEndTag(lp); |
---|
904 | } |
---|
905 | |
---|
906 | /* point ce to parent of current ce. |
---|
907 | * endtag no longer valid. |
---|
908 | */ |
---|
909 | static void |
---|
910 | popXMLEle(LilXML *lp) |
---|
911 | { |
---|
912 | lp->ce = lp->ce->pe; |
---|
913 | resetEndTag(lp); |
---|
914 | } |
---|
915 | |
---|
916 | /* return one new XMLEle, added to the given element if given */ |
---|
917 | static XMLEle * |
---|
918 | growEle (XMLEle *pe) |
---|
919 | { |
---|
920 | XMLEle *newe = (XMLEle *) moremem (NULL, sizeof(XMLEle)); |
---|
921 | |
---|
922 | memset (newe, 0, sizeof(XMLEle)); |
---|
923 | newString (&newe->tag); |
---|
924 | newString (&newe->pcdata); |
---|
925 | newe->pe = pe; |
---|
926 | |
---|
927 | if (pe) { |
---|
928 | pe->el = (XMLEle **) moremem (pe->el, (pe->nel+1)*sizeof(XMLEle *)); |
---|
929 | pe->el[pe->nel++] = newe; |
---|
930 | } |
---|
931 | |
---|
932 | return (newe); |
---|
933 | } |
---|
934 | |
---|
935 | /* add room for and return one new XMLAtt to the given element */ |
---|
936 | static XMLAtt * |
---|
937 | growAtt(XMLEle *ep) |
---|
938 | { |
---|
939 | XMLAtt *newa = (XMLAtt *) moremem (NULL, sizeof(XMLAtt)); |
---|
940 | |
---|
941 | memset (newa, 0, sizeof(*newa)); |
---|
942 | newString(&newa->name); |
---|
943 | newString(&newa->valu); |
---|
944 | newa->ce = ep; |
---|
945 | |
---|
946 | ep->at = (XMLAtt **) moremem (ep->at, (ep->nat+1)*sizeof(XMLAtt *)); |
---|
947 | ep->at[ep->nat++] = newa; |
---|
948 | |
---|
949 | return (newa); |
---|
950 | } |
---|
951 | |
---|
952 | /* free a and all it holds */ |
---|
953 | static void |
---|
954 | freeAtt (XMLAtt *a) |
---|
955 | { |
---|
956 | if (!a) |
---|
957 | return; |
---|
958 | freeString (&a->name); |
---|
959 | freeString (&a->valu); |
---|
960 | (*myfree)(a); |
---|
961 | } |
---|
962 | |
---|
963 | /* reset endtag */ |
---|
964 | static void |
---|
965 | resetEndTag(LilXML *lp) |
---|
966 | { |
---|
967 | freeString (&lp->endtag); |
---|
968 | newString (&lp->endtag); |
---|
969 | } |
---|
970 | |
---|
971 | /* 1 if c is a valid token character, else 0. |
---|
972 | * it can be alpha or '_' or numeric unless start. |
---|
973 | */ |
---|
974 | static int |
---|
975 | isTokenChar (int start, int c) |
---|
976 | { |
---|
977 | return (isalpha(c) || c == '_' || (!start && isdigit(c))); |
---|
978 | } |
---|
979 | |
---|
980 | /* grow the String storage at *sp to append c */ |
---|
981 | static void |
---|
982 | growString (String *sp, int c) |
---|
983 | { |
---|
984 | int l = sp->sl + 2; /* need room for '\0' plus c */ |
---|
985 | |
---|
986 | if (l > sp->sm) { |
---|
987 | if (!sp->s) |
---|
988 | newString (sp); |
---|
989 | else |
---|
990 | sp->s = (char *) moremem (sp->s, sp->sm *= 2); |
---|
991 | } |
---|
992 | sp->s[--l] = '\0'; |
---|
993 | sp->s[--l] = (char)c; |
---|
994 | sp->sl++; |
---|
995 | } |
---|
996 | |
---|
997 | /* append str to the String storage at *sp */ |
---|
998 | static void |
---|
999 | appendString (String *sp, const char *str) |
---|
1000 | { |
---|
1001 | int strl = strlen (str); |
---|
1002 | int l = sp->sl + strl + 1; /* need room for '\0' */ |
---|
1003 | |
---|
1004 | if (l > sp->sm) { |
---|
1005 | if (!sp->s) |
---|
1006 | newString (sp); |
---|
1007 | if (l > sp->sm) |
---|
1008 | sp->s = (char *) moremem (sp->s, (sp->sm = l)); |
---|
1009 | } |
---|
1010 | strcpy (&sp->s[sp->sl], str); |
---|
1011 | sp->sl += strl; |
---|
1012 | } |
---|
1013 | |
---|
1014 | /* init a String with a malloced string containing just \0 */ |
---|
1015 | static void |
---|
1016 | newString(String *sp) |
---|
1017 | { |
---|
1018 | sp->s = (char *)moremem(NULL, MINMEM); |
---|
1019 | sp->sm = MINMEM; |
---|
1020 | *sp->s = '\0'; |
---|
1021 | sp->sl = 0; |
---|
1022 | } |
---|
1023 | |
---|
1024 | /* free memory used by the given String */ |
---|
1025 | static void |
---|
1026 | freeString (String *sp) |
---|
1027 | { |
---|
1028 | if (sp->s) |
---|
1029 | (*myfree) (sp->s); |
---|
1030 | sp->s = NULL; |
---|
1031 | sp->sl = 0; |
---|
1032 | sp->sm = 0; |
---|
1033 | } |
---|
1034 | |
---|
1035 | /* like malloc but knows to use realloc if already started */ |
---|
1036 | static void * |
---|
1037 | moremem (void *old, int n) |
---|
1038 | { |
---|
1039 | return (old ? (*myrealloc)(old, n) : (*mymalloc)(n)); |
---|
1040 | } |
---|
1041 | |
---|
1042 | #if defined(MAIN_TST) |
---|
1043 | int |
---|
1044 | main (int ac, char *av[]) |
---|
1045 | { |
---|
1046 | LilXML *lp = newLilXML(); |
---|
1047 | char errmsg[1024]; |
---|
1048 | XMLEle *root; |
---|
1049 | |
---|
1050 | root = readXMLFile (stdin, lp, errmsg); |
---|
1051 | if (root) { |
---|
1052 | char *str; |
---|
1053 | int l; |
---|
1054 | |
---|
1055 | if (ac > 1) { |
---|
1056 | XMLEle *theend = addXMLEle (root, "theend"); |
---|
1057 | editXMLEle (theend, "Added to test editing"); |
---|
1058 | addXMLAtt (theend, "hello", "world"); |
---|
1059 | } |
---|
1060 | |
---|
1061 | fprintf (stderr, "::::::::::::: %s\n", tagXMLEle(root)); |
---|
1062 | prXMLEle (stdout, root, 0); |
---|
1063 | |
---|
1064 | l = sprlXMLEle (root, 0); |
---|
1065 | str = malloc (l+1); |
---|
1066 | fprintf (stderr, "::::::::::::: %s : %d : %d", |
---|
1067 | tagXMLEle(root), l, sprXMLEle (str, root, 0)); |
---|
1068 | fprintf (stderr, ": %d\n", printf ("%s", str)); |
---|
1069 | |
---|
1070 | delXMLEle (root); |
---|
1071 | } else if (errmsg[0]) { |
---|
1072 | fprintf (stderr, "Error: %s\n", errmsg); |
---|
1073 | } |
---|
1074 | |
---|
1075 | delLilXML (lp); |
---|
1076 | |
---|
1077 | return (0); |
---|
1078 | } |
---|
1079 | #endif |
---|
1080 | |
---|