libsmu.c (16316B)
1 #include <stdarg.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <ctype.h> 6 /* 7 * This is a modified version of smu to work with adonis-ssg 8 * (c) 2007-2014 Enno Boland <g s01 de> (c) 2020-2023 kqueue <kqueue@cocaine.ninja> 9 * Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. 10 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING 12 * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 13 */ 14 #define LENGTH(x) sizeof(x)/sizeof(x[0]) 15 #define ADDC(b,i) if(i % BUFSIZ == 0) { b = realloc(b, (i + BUFSIZ) * sizeof(char)); if(!b) eprint("Malloc failed."); } b[i] 16 FILE *output; 17 typedef int (*Parser) (const char *, const char *, int); 18 typedef struct { 19 char *search; 20 int process; 21 char *before, *after; 22 } Tag; 23 24 static int doamp(const char *begin, const char *end, int newblock); /* Parser for & */ 25 static int docomment(const char *begin, const char *end, int newblock); /* Parser for 26 * html-comments */ 27 static int dogtlt(const char *begin, const char *end, int newblock); /* Parser for < and > */ 28 static int dohtml(const char *begin, const char *end, int newblock); /* Parser for html */ 29 static int dolineprefix(const char *begin, const char *end, int newblock); /* Parser for line 30 * prefix tags */ 31 static int dolink(const char *begin, const char *end, int newblock); /* Parser for links and 32 * images */ 33 static int dolist(const char *begin, const char *end, int newblock); /* Parser for lists */ 34 static int doparagraph(const char *begin, const char *end, int newblock); /* Parser for paragraphs */ 35 static int doreplace(const char *begin, const char *end, int newblock); /* Parser for simple 36 * replaces */ 37 static int doshortlink(const char *begin, const char *end, int newblock); /* Parser for links and 38 * images */ 39 static int dosurround(const char *begin, const char *end, int newblock); /* Parser for 40 * surrounding tags */ 41 static int dounderline(const char *begin, const char *end, int newblock); /* Parser for underline 42 * tags */ 43 static void *ereallocz(void *p, size_t size); 44 static void hprint(const char *begin, const char *end); /* escapes HTML and 45 * prints it to output */ 46 static void process(const char *begin, const char *end, int isblock); /* Processes range 47 * between begin and 48 * end. */ 49 50 /* list of parsers */ 51 static Parser parsers[] = {dounderline, docomment, dolineprefix, 52 dolist, doparagraph, dogtlt, dosurround, dolink, 53 doshortlink, dohtml, doamp, doreplace}; 54 static int nohtml = 0; 55 56 static Tag lineprefix[] = { 57 {" ", 0, "<pre><code>", "\n</code></pre>"}, 58 {"\t", 0, "<pre><code>", "\n</code></pre>"}, 59 {">", 2, "<blockquote>", "</blockquote>"}, 60 {"###### ", 1, "<h6>", "</h6>"}, 61 {"##### ", 1, "<h5>", "</h5>"}, 62 {"#### ", 1, "<h4>", "</h4>"}, 63 {"### ", 1, "<h3>", "</h3>"}, 64 {"## ", 1, "<h2>", "</h2>"}, 65 {"# ", 1, "<h1>", "</h1>"}, 66 {"- - -\n", 1, "<hr />", ""}, 67 }; 68 69 static Tag underline[] = { 70 {"=", 1, "<h1>", "</h1>\n"}, 71 {"-", 1, "<h2>", "</h2>\n"}, 72 }; 73 74 static Tag surround[] = { 75 {"``", 0, "<code>", "</code>"}, 76 {"`", 0, "<code>", "</code>"}, 77 {"___", 1, "<strong><em>", "</em></strong>"}, 78 {"***", 1, "<strong><em>", "</em></strong>"}, 79 {"__", 1, "<strong>", "</strong>"}, 80 {"**", 1, "<strong>", "</strong>"}, 81 {"_", 1, "<em>", "</em>"}, 82 {"*", 1, "<em>", "</em>"}, 83 }; 84 85 static const char *replace[][2] = { 86 {"\\\\", "\\"}, 87 {"\\`", "`"}, 88 {"\\*", "*"}, 89 {"\\_", "_"}, 90 {"\\{", "{"}, 91 {"\\}", "}"}, 92 {"\\[", "["}, 93 {"\\]", "]"}, 94 {"\\(", "("}, 95 {"\\)", ")"}, 96 {"\\#", "#"}, 97 {"\\+", "+"}, 98 {"\\-", "-"}, 99 {"\\.", "."}, 100 {"\\!", "!"}, 101 }; 102 103 static const char *insert[][2] = { 104 {" \n", "<br />"}, 105 }; 106 107 void 108 eprint(const char *format,...) 109 { 110 va_list ap; 111 112 va_start(ap, format); 113 vfprintf(stderr, format, ap); 114 va_end(ap); 115 exit(EXIT_FAILURE); 116 } 117 118 int 119 doamp(const char *begin, const char *end, int newblock) 120 { 121 const char *p; 122 123 if (*begin != '&') 124 return 0; 125 if (!nohtml) { 126 for (p = begin + 1; p != end && !strchr("; \\\n\t", *p); p++); 127 if (p == end || *p == ';') 128 return 0; 129 } 130 fputs("&", output); 131 return 1; 132 } 133 134 int 135 dogtlt(const char *begin, const char *end, int newblock) 136 { 137 int brpos; 138 char c; 139 140 if (nohtml || begin + 1 >= end) 141 return 0; 142 brpos = begin[1] == '>'; 143 if (!brpos && *begin != '<') 144 return 0; 145 c = begin[brpos ? 0 : 1]; 146 if (!brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z')) { 147 fputs("<", output); 148 return 1; 149 } else if (brpos && (c < 'a' || c > 'z') && (c < 'A' || c > 'Z') && !strchr("/\"'", c)) { 150 fprintf(output, "%c>", c); 151 return 2; 152 } 153 return 0; 154 } 155 156 int 157 docomment(const char *begin, const char *end, int newblock) 158 { 159 char *p; 160 161 if (nohtml || strncmp("<!--", begin, 4)) 162 return 0; 163 p = strstr(begin, "-->"); 164 if (!p || p + 3 >= end) 165 return 0; 166 fprintf(output, "%.*s\n", (int) (p + 3 - begin), begin); 167 return (p + 3 - begin) * (newblock ? -1 : 1); 168 } 169 170 int 171 dohtml(const char *begin, const char *end, int newblock) 172 { 173 const char *p, *tag, *tagend; 174 175 if (nohtml || begin + 2 >= end) 176 return 0; 177 p = begin; 178 if (p[0] != '<' || !isalpha(p[1])) 179 return 0; 180 p++; 181 tag = p; 182 for (; isalnum(*p) && p < end; p++); 183 tagend = p; 184 if (p > end || tag == tagend) 185 return 0; 186 while ((p = strstr(p, "</")) && p < end) { 187 p += 2; 188 if (strncmp(p, tag, tagend - tag) == 0 && p[tagend - tag] == '>') { 189 p++; 190 fwrite(begin, sizeof(char), p - begin + tagend - tag + 1, output); 191 return p - begin + tagend - tag + 1; 192 } 193 } 194 p = strchr(tagend, '>'); 195 if (p) { 196 fwrite(begin, sizeof(char), p - begin + 2, output); 197 return p - begin + 2; 198 } else 199 return 0; 200 } 201 202 int 203 dolineprefix(const char *begin, const char *end, int newblock) 204 { 205 unsigned int i, j, l; 206 char *buffer; 207 const char *p; 208 209 if (newblock) 210 p = begin; 211 else if (*begin == '\n') 212 p = begin + 1; 213 else 214 return 0; 215 for (i = 0; i < LENGTH(lineprefix); i++) { 216 l = strlen(lineprefix[i].search); 217 if (end - p < l) 218 continue; 219 if (strncmp(lineprefix[i].search, p, l)) 220 continue; 221 if (*begin == '\n') 222 fputc('\n', output); 223 fputs(lineprefix[i].before, output); 224 if (lineprefix[i].search[l - 1] == '\n') { 225 fputc('\n', output); 226 return l - 1; 227 } 228 if (!(buffer = malloc(BUFSIZ))) 229 eprint("Malloc failed."); 230 buffer[0] = '\0'; 231 232 /* Collect lines into buffer while they start with the prefix */ 233 j = 0; 234 while ((strncmp(lineprefix[i].search, p, l) == 0) && p + l < end) { 235 p += l; 236 237 /* 238 * Special case for blockquotes: optional space after 239 * > 240 */ 241 if (lineprefix[i].search[0] == '>' && *p == ' ') { 242 p++; 243 } 244 while (p < end) { 245 ADDC(buffer, j) = *p; 246 j++; 247 if (*(p++) == '\n') 248 break; 249 } 250 } 251 252 /* Skip empty lines in block */ 253 while (*(buffer + j - 1) == '\n') { 254 j--; 255 } 256 257 ADDC(buffer, j) = '\0'; 258 if (lineprefix[i].process) 259 process(buffer, buffer + strlen(buffer), lineprefix[i].process >= 2); 260 else 261 hprint(buffer, buffer + strlen(buffer)); 262 fputs(lineprefix[i].after, output); 263 fputc('\n', output); 264 free(buffer); 265 return -(p - begin); 266 } 267 return 0; 268 } 269 270 int 271 dolink(const char *begin, const char *end, int newblock) 272 { 273 int img , len, sep, parens_depth = 1; 274 const char *desc, *link, *p, *q, *descend, *linkend; 275 const char *title = NULL, *titleend = NULL; 276 277 if (*begin == '[') 278 img = 0; 279 else if (strncmp(begin, "![", 2) == 0) 280 img = 1; 281 else 282 return 0; 283 p = desc = begin + 1 + img; 284 if (!(p = strstr(desc, "](")) || p > end) 285 return 0; 286 for (q = strstr(desc, "!["); q && q < end && q < p; q = strstr(q + 1, "![")) 287 if (!(p = strstr(p + 1, "](")) || p > end) 288 return 0; 289 descend = p; 290 link = p + 2; 291 292 /* find end of link while handling nested parens */ 293 q = link; 294 while (parens_depth) { 295 if (!(q = strpbrk(q, "()")) || q > end) 296 return 0; 297 if (*q == '(') 298 parens_depth++; 299 else 300 parens_depth--; 301 if (parens_depth && q < end) 302 q++; 303 } 304 305 if ((p = strpbrk(link, "\"'")) && p < end && q > p) { 306 sep = p[0]; /* separator: can be " or ' */ 307 title = p + 1; 308 /* strip trailing whitespace */ 309 for (linkend = p; linkend > link && isspace(*(linkend - 1)); linkend--); 310 for (titleend = q - 1; titleend > link && isspace(*(titleend)); titleend--); 311 if (*titleend != sep) { 312 return 0; 313 } 314 } else { 315 linkend = q; 316 } 317 318 /* Links can be given in angular brackets */ 319 if (*link == '<' && *(linkend - 1) == '>') { 320 link++; 321 linkend--; 322 } 323 len = q + 1 - begin; 324 if (img) { 325 fputs("<img src=\"", output); 326 hprint(link, linkend); 327 fputs("\" alt=\"", output); 328 hprint(desc, descend); 329 fputs("\" ", output); 330 if (title && titleend) { 331 fputs("title=\"", output); 332 hprint(title, titleend); 333 fputs("\" ", output); 334 } 335 fputs("/>", output); 336 } else { 337 fputs("<a href=\"", output); 338 hprint(link, linkend); 339 fputs("\"", output); 340 if (title && titleend) { 341 fputs(" title=\"", output); 342 hprint(title, titleend); 343 fputs("\"", output); 344 } 345 fputs(">", output); 346 process(desc, descend, 0); 347 fputs("</a>", output); 348 } 349 return len; 350 } 351 352 int 353 dolist(const char *begin, const char *end, int newblock) 354 { 355 unsigned int i, j, indent, run, ul, isblock; 356 const char *p, *q; 357 char *buffer = NULL; 358 char marker; 359 360 isblock = 0; 361 if (newblock) 362 p = begin; 363 else if (*begin == '\n') 364 p = begin + 1; 365 else 366 return 0; 367 q = p; 368 if (*p == '-' || *p == '*' || *p == '+') { 369 ul = 1; 370 marker = *p; 371 } else { 372 ul = 0; 373 for (; p < end && *p >= '0' && *p <= '9'; p++); 374 if (p >= end || *p != '.') 375 return 0; 376 } 377 p++; 378 if (p >= end || !(*p == ' ' || *p == '\t')) 379 return 0; 380 for (p++; p != end && (*p == ' ' || *p == '\t'); p++); 381 indent = p - q; 382 buffer = ereallocz(buffer, BUFSIZ); 383 if (!newblock) 384 fputc('\n', output); 385 fputs(ul ? "<ul>\n" : "<ol>\n", output); 386 run = 1; 387 for (; p < end && run; p++) { 388 for (i = 0; p < end && run; p++, i++) { 389 if (*p == '\n') { 390 if (p + 1 == end) 391 break; 392 else { 393 /* Handle empty lines */ 394 for (q = p + 1; (*q == ' ' || *q == '\t') && q < end; q++); 395 if (*q == '\n') { 396 ADDC(buffer, i) = '\n'; 397 i++; 398 run = 0; 399 isblock++; 400 p = q; 401 } 402 } 403 q = p + 1; 404 j = 0; 405 if (ul && *q == marker) 406 j = 1; 407 else if (!ul) { 408 for (; q + j != end && q[j] >= '0' && q[j] <= '9' && j < indent; j++); 409 if (q + j == end) 410 break; 411 if (j > 0 && q[j] == '.') 412 j++; 413 else 414 j = 0; 415 } 416 if (q + indent < end) 417 for (; (q[j] == ' ' || q[j] == '\t') && j < indent; j++); 418 if (j == indent) { 419 ADDC(buffer, i) = '\n'; 420 i++; 421 p += indent; 422 run = 1; 423 if (*q == ' ' || *q == '\t') 424 p++; 425 else 426 break; 427 } else if (j < indent) 428 run = 0; 429 } 430 ADDC(buffer, i) = *p; 431 } 432 ADDC(buffer, i) = '\0'; 433 fputs("<li>", output); 434 process(buffer, buffer + i, isblock > 1 || (isblock == 1 && run)); 435 fputs("</li>\n", output); 436 } 437 fputs(ul ? "</ul>\n" : "</ol>\n", output); 438 free(buffer); 439 p--; 440 while (*(--p) == '\n'); 441 return -(p - begin + 1); 442 } 443 444 int 445 doparagraph(const char *begin, const char *end, int newblock) 446 { 447 const char *p; 448 449 if (!newblock) 450 return 0; 451 p = strstr(begin, "\n\n"); 452 if (!p || p > end) 453 p = end; 454 if (p - begin <= 1) 455 return 0; 456 fputs("<p>", output); 457 process(begin, p, 0); 458 fputs("</p>\n", output); 459 return -(p - begin); 460 } 461 462 int 463 doreplace(const char *begin, const char *end, int newblock) 464 { 465 unsigned int i, l; 466 467 for (i = 0; i < LENGTH(insert); i++) 468 if (strncmp(insert[i][0], begin, strlen(insert[i][0])) == 0) 469 fputs(insert[i][1], output); 470 for (i = 0; i < LENGTH(replace); i++) { 471 l = strlen(replace[i][0]); 472 if (end - begin < l) 473 continue; 474 if (strncmp(replace[i][0], begin, l) == 0) { 475 fputs(replace[i][1], output); 476 return l; 477 } 478 } 479 return 0; 480 } 481 482 int 483 doshortlink(const char *begin, const char *end, int newblock) 484 { 485 const char *p, *c; 486 int ismail = 0; 487 488 if (*begin != '<') 489 return 0; 490 for (p = begin + 1; p != end; p++) { 491 switch (*p) { 492 case ' ': 493 case '\t': 494 case '\n': 495 return 0; 496 case '#': 497 case ':': 498 ismail = -1; 499 break; 500 case '@': 501 if (ismail == 0) 502 ismail = 1; 503 break; 504 case '>': 505 if (ismail == 0) 506 return 0; 507 fputs("<a href=\"", output); 508 if (ismail == 1) { 509 /* mailto: */ 510 fputs("mailto:", output); 511 for (c = begin + 1; *c != '>'; c++) 512 fprintf(output, "&#%u;", *c); 513 fputs("\">", output); 514 for (c = begin + 1; *c != '>'; c++) 515 fprintf(output, "&#%u;", *c); 516 } else { 517 hprint(begin + 1, p); 518 fputs("\">", output); 519 hprint(begin + 1, p); 520 } 521 fputs("</a>", output); 522 return p - begin + 1; 523 } 524 } 525 return 0; 526 } 527 528 int 529 dosurround(const char *begin, const char *end, int newblock) 530 { 531 unsigned int i, l; 532 const char *p, *start, *stop; 533 534 for (i = 0; i < LENGTH(surround); i++) { 535 l = strlen(surround[i].search); 536 if (end - begin < 2 * l || strncmp(begin, surround[i].search, l) != 0) 537 continue; 538 start = begin + l; 539 p = start - 1; 540 do { 541 stop = p; 542 p = strstr(p + 1, surround[i].search); 543 } while (p && p[-1] == '\\'); 544 if (p && p[-1] != '\\') 545 stop = p; 546 if (!stop || stop < start || stop >= end) 547 continue; 548 fputs(surround[i].before, output); 549 550 /* Single space at start and end are ignored */ 551 if (*start == ' ' && *(stop - 1) == ' ') { 552 start++; 553 stop--; 554 l++; 555 } 556 if (surround[i].process) 557 process(start, stop, 0); 558 else 559 hprint(start, stop); 560 fputs(surround[i].after, output); 561 return stop - begin + l; 562 } 563 return 0; 564 } 565 566 int 567 dounderline(const char *begin, const char *end, int newblock) 568 { 569 unsigned int i, j, l; 570 const char *p; 571 572 if (!newblock) 573 return 0; 574 p = begin; 575 for (l = 0; p + l != end && p[l] != '\n'; l++); 576 p += l + 1; 577 if (l == 0) 578 return 0; 579 for (i = 0; i < LENGTH(underline); i++) { 580 for (j = 0; p + j != end && p[j] != '\n' && p[j] == underline[i].search[0]; j++); 581 if (j >= l) { 582 fputs(underline[i].before, output); 583 if (underline[i].process) 584 process(begin, begin + l, 0); 585 else 586 hprint(begin, begin + l); 587 fputs(underline[i].after, output); 588 return -(j + p - begin); 589 } 590 } 591 return 0; 592 } 593 594 void * 595 ereallocz(void *p, size_t size) 596 { 597 void *res; 598 if (p) 599 res = realloc(p, size); 600 else 601 res = calloc(1, size); 602 603 if (!res) 604 eprint("fatal: could not malloc() %u bytes\n", size); 605 return res; 606 } 607 608 void 609 hprint(const char *begin, const char *end) 610 { 611 const char *p; 612 613 for (p = begin; p != end; p++) { 614 if (*p == '&') 615 fputs("&", output); 616 else if (*p == '"') 617 fputs(""", output); 618 else if (*p == '>') 619 fputs(">", output); 620 else if (*p == '<') 621 fputs("<", output); 622 else 623 fputc(*p, output); 624 } 625 } 626 627 void 628 process(const char *begin, const char *end, int newblock) 629 { 630 const char *p, *q; 631 int affected; 632 unsigned int i; 633 634 for (p = begin; p < end;) { 635 if (newblock) 636 while (*p == '\n') 637 if (++p == end) 638 return; 639 affected = 0; 640 for (i = 0; i < LENGTH(parsers) && !affected; i++) 641 affected = parsers[i] (p, end, newblock); 642 p += abs(affected); 643 if (!affected) { 644 if (nohtml) 645 hprint(p, p + 1); 646 else 647 fputc(*p, output); 648 p++; 649 } 650 for (q = p; q != end && *q == '\n'; q++); 651 if (q == end) 652 return; 653 else if (p[0] == '\n' && p + 1 != end && p[1] == '\n') 654 newblock = 1; 655 else 656 newblock = affected < 0; 657 } 658 } 659 660 int 661 smu_convert(FILE * out, FILE * source, int supresshtml) 662 { 663 char *buffer = NULL; 664 int s , i; 665 unsigned long len, bsize; 666 output = out; 667 nohtml = supresshtml; 668 bsize = 2 * BUFSIZ; 669 buffer = ereallocz(buffer, bsize); 670 len = 0; 671 while ((s = fread(buffer + len, 1, BUFSIZ, source))) { 672 len += s; 673 if (BUFSIZ + len + 1 > bsize) { 674 bsize += BUFSIZ; 675 if (!(buffer = realloc(buffer, bsize))) 676 eprint("realloc failed."); 677 } 678 } 679 buffer[len] = '\0'; 680 process(buffer, buffer + len, 1); 681 free(buffer); 682 return EXIT_SUCCESS; 683 }