/[csv2latex]/csv2latex.c
ViewVC logotype

Contents of /csv2latex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 49 - (show annotations)
Mon Nov 28 20:57:57 2016 UTC (12 months, 2 weeks ago) by (unknown author)
File MIME type: text/plain
File size: 17922 byte(s)
fix spelling errors
1 /*
2 * csv2latex.c, copyright © 2002- Benoît Rouits <brouits@free.fr>
3 *
4 *********************************************************
5 * csv2latex translates a .csv file to a LaTex document. *
6 *********************************************************
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; version 2 only
11 * of the License.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 * see the COPYING file included in the csv2latex package or
24 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
25 *
26 */
27
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <libgen.h>
32 #include <getopt.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include "version.h"
36
37 typedef struct {
38 char* tab; /* actual escapes */
39 int size; /* escape tab len */
40 } texcape;
41
42 typedef struct {
43 char block; /* CSV delimitor if any */
44 char sep; /* CSV separator */
45 unsigned int cols; /* CSV columns */
46 unsigned int chars; /* CSV max data length */
47 unsigned int rows; /* CSV total number of lines */
48 char pos; /* position in cell (align) */
49 unsigned int lines; /* rows per LaTeX tabular */
50 unsigned int guess; /* guess or not the CSV format */
51 unsigned int header; /* put LaTeX document header or not */
52 unsigned int red; /* table reduction level (from 1 to 4)*/
53 unsigned int longtable; /* use package longtable */
54 unsigned int escape; /* escape TeX control chars or not */
55 unsigned int repeat; /* repeat table headers for each LaTeX table section or not */
56 unsigned int vlines; /* insert vertical lines between columns or not */
57 unsigned int hlines; /* insert horizontal lines between rows or not */
58 char* clrrow; /* row graylevel (from 0 to 1) */
59 texcape* tex; /* TeX escapes */
60 } config;
61
62 #define MAXUINT ((unsigned int)(-1))
63
64 void rtfm(char* prog) {
65 printf("%s translates a csv file to a LaTeX file\n", basename(prog));
66 printf("Example: %s january_stats.csv > january_stats.tex\n", basename(prog));
67 printf("Usage: %s [--nohead] (LaTeX) no document header: useful for inclusion\n", basename(prog));
68 printf(" [--longtable] (LaTeX) use package longtable: useful for long input\n");
69 printf(" [--noescape] (LaTeX) do not escape text: useful for mixed CSV/TeX input\n");
70 printf(" [--guess] (CSV) guess separator and block |\n"
71 " [--separator <(c)omma|(s)emicolon|(t)ab|s(p)ace|co(l)on>] (CSV's comma)\n"
72 " [--block <(q)uote|(d)ouble|(n)one>] (CSV) block delimiter (e.g: none)\n");
73 printf(" [--lines n] (LaTeX) rows per table: useful for long tabulars\n");
74 printf(" [--position <l|c|r>] (LaTeX) text align in cells\n");
75 printf(" [--colorrows graylevel] (LaTeX) alternate gray rows (e.g: 0.75)\n");
76 printf(" [--reduce level] (LaTeX) reduce table size (e.g: 1)\n");
77 printf(" [--repeatheader] (LaTeX) repeat table header (for long tables)\n");
78 printf(" [--nohlines] (LaTeX) don't put hline between table rows\n");
79 printf(" [--novlines] (LaTeX) don't put vline between columns\n");
80 printf(" csv_file.csv\n");
81 printf("The \"longtable\" option needs the {longtable} LaTeX package\n");
82 printf("The \"colorrows\" option needs the {colortbl} LaTeX package\n");
83 printf("The \"reduce\" option needs the {relsize} LaTeX package\n");
84 return;
85 }
86
87 config* parseOptions (config* conf, int argc, char **argv) {
88 /* thx to <vfebvre@lautre.net> */
89 int opt;
90 int tmp;
91
92 #if defined USE_GETOPT
93 #else
94 int longopt_index = 0;
95 static struct option long_options[] = {
96 {"help", 0, NULL, 'h'},
97 {"guess", 0, NULL, 'g'},
98 {"block", 1, NULL, 'b'},
99 {"lines", 1, NULL, 'l'},
100 {"noescape", 0, NULL, 'x'},
101 {"nohead", 0, NULL, 'n'},
102 {"version", 0, NULL, 'v'},
103 {"position", 1, NULL, 'p'},
104 {"separator", 1, NULL, 's'},
105 {"colorrows", 1, NULL, 'c'},
106 {"reduce", 1, NULL, 'r'},
107 {"longtable", 0, NULL, 't'},
108 {"repeatheader", 0, NULL, 'e'},
109 {"novlines", 0, NULL, 'y'},
110 {"nohlines", 0, NULL, 'z'},
111 {NULL, 0, NULL, 0} /* marks end-of-list */
112 };
113 #endif
114 #if defined USE_GETOPT
115 while ((opt = getopt (argc, argv, "hvgnxteyz?b:l:p:s:c:r:")) != EOF) {
116 #else
117 while ((opt = getopt_long (argc, argv, "hvgnxteyz?b:l:p:s:c:r:", long_options, &longopt_index)) > 0) {
118 #endif
119 switch (opt) {
120 case '?':
121 case 'h':
122 rtfm (argv[0]);
123 exit (EXIT_SUCCESS);
124 break;
125 case 'g': /* guess the CSV */
126 conf->guess = 1;
127 break;
128 case 't': /* use package longtable */ /* thx to <Christof.Bodner@infineon.com> */
129 conf->longtable = 1;
130 break;
131 case 'b': /* csv block delimiter */
132 if (optarg[0] == 'q')
133 conf->block = '\'';
134 else if (optarg[0] == 'd')
135 conf->block = '"';
136 else if (optarg[0] == 'n')
137 conf->block = 0; /* no block delimiter */
138 break;
139 case 'l': /* number of lines per TeX tabulars */
140 if (isdigit(optarg[0])) {
141 conf->lines = atoi(optarg);
142 } else {
143 fprintf(stderr,
144 "option \"lines\" need a positive integer value\n");
145 exit(EXIT_FAILURE);
146 }
147 break;
148 case 'n':
149 conf->header = 0;
150 break;
151 case 'x':
152 conf->escape = 0;
153 break;
154 case 'v': /* version */
155 printf ("%s © 2002- Benoît Rouits <brouits@free.fr>\n"
156 "\tVersion %s (%s)\n", PACKAGE, VERSION, RELEASE_DATE);
157 exit (EXIT_SUCCESS);
158 break;
159 case 'p': /* LaTeX position in cell */
160 conf->pos = optarg[0]; /* position char in cell */
161 break;
162 case 's': /* csv block separator */
163 if (optarg[0] == 'c')
164 conf->sep = ',';
165 else if (optarg[0] == 's')
166 conf->sep = ';';
167 else if (optarg[0] == 't')
168 conf->sep = '\t';
169 else if (optarg[0] == 'p')
170 conf->sep = ' ';
171 else if (optarg[0] == 'l')
172 conf->sep = ':';
173 break;
174 case 'c': /* color rows (thx to <jcorso@cse.Buffalo.EDU>) */
175 if (isdigit(optarg[0])) {
176 conf->clrrow = (char*)malloc(strlen(optarg)+1);
177 strcpy(conf->clrrow, optarg);
178 } else {
179 fprintf(stderr,
180 "option \"colorrows\" needs a real value between 0 and 1\n");
181 exit(EXIT_FAILURE);
182 }
183 break;
184 case 'r': /* reduce table size (original idea thx to <boaz.gezer@gmail.com>) */
185 if (isdigit(optarg[0])) {
186 tmp = atoi(optarg);
187 conf->red = (tmp>4) ? 4 : (tmp<0) ? 0 : tmp; /* [1-4] */
188 } else {
189 fprintf(stderr,
190 "option \"reduce\" needs an integer value between 1 and 4\n");
191 exit(EXIT_FAILURE);
192 }
193 break;
194 case 'e': /*repeat table header for each table section*/
195 conf->repeat = 1;
196 break;
197 case 'y': /*don't draw vlines between columns*/
198 conf->vlines = 0;
199 break;
200 case 'z': /*don't draw hlines between rows*/
201 conf->hlines = 0;
202 break;
203 }
204 }
205 return conf;
206 }
207 int guessCSV(config* conf, FILE* in) {
208 /* guess the block delimiter and the csv separator */
209 int token;
210
211 token = getc(in); /* first char is block delimiter */
212 if (token == EOF) {
213 fprintf(stderr, "ERROR: empty file ?\n");
214 return -1;
215 } else if (ispunct(token) || token == ' ') {
216 /* found first block delimiter, act this way */
217 conf->block = token;
218 fprintf(stderr, "Guessed '%c' as Block Delimiter\n",
219 conf->block);
220 /* stream file while token is printable data */
221 while ((token = getc(in)) != conf->block &&
222 token != '\n' &&
223 token != EOF)
224 {/* getc has been done */}
225 if (token == conf->block) {
226 /* second delimiter : next is separator */
227 conf->sep = getc(in);
228 fprintf(stderr, "Guessed '%c' as Separator\n",
229 conf->sep);
230 return 0;
231 } else {
232 return -1; /* what else ? */
233 }
234 } else { /* no block delimiter, act this way */
235 conf->block = 0;
236 fprintf(stderr, "Guessed No Block Delimiter\n");
237 /* stream file while input is not a control char */
238 while (!ispunct((token = getc(in))) &&
239 token != '\n' &&
240 token != EOF)
241 {/* getc has been done */}
242 /* guess CSV separator */
243 if (ispunct(token) || token == '\t' || token == ' ') {
244 conf->sep = token;
245 fprintf(stderr, "Guessed %c as Separator\n", conf->sep);
246 return 0;
247 } else { /* did not found any separator */
248 fprintf(stderr, "ERROR: Did not guess any Separator!\n");
249 return -1;
250 }
251 }
252 return 0;
253 }
254
255 void getMaximums(config* conf, FILE* in) {
256 /* gets the number of cols and chars of a csv file assuming a separator */
257 int token = 0;
258 int nosep = 0;
259 unsigned int curcol = 0;
260 unsigned int curchar = 0;
261 unsigned int inblock = 0;
262 /* init */
263 conf->chars = 0;
264 conf->cols = 0;
265 conf->rows = 0;
266
267 while (token != EOF) {
268 token = getc(in);
269
270 /* EOF ? */
271 if (token == EOF) {
272 continue;
273 }
274
275 /* decide the maximums */
276 if (token == '\n') {
277 curcol++;
278 conf->cols = (conf->cols<curcol) ? curcol : conf->cols;
279 conf->chars = (conf->chars<curchar) ? curchar : conf->chars;
280 conf->rows++;
281 curcol = 0;
282 curchar = 0;
283 inblock = 0; /* reset block state */
284 continue;
285 }
286
287 /* check implicit, non-guessed, block */
288 if (token == '\"') {
289 if (nosep == 0)
290 nosep = 1;
291 else
292 nosep = 0;
293 }
294
295 /* enter/quit a block */
296 if (conf->block && token == conf->block) {
297 inblock = !inblock;
298 continue;
299 }
300
301 /* count cols in current line */
302 if (token == conf->sep && ((conf->block && !inblock) || !conf->block) && nosep == 0) {
303 curcol++;
304 continue;
305 }
306
307 /* count chars in current cell */
308 if (token != conf->block && ((conf->block && inblock) || !conf->block)) {
309 curchar++;
310 continue;
311 }
312 }
313 return;
314 }
315
316 void doTeXsub(config* conf, char newsep, FILE* in, FILE* out) {
317 /* substitutes CSV sep by LaTeX newsep and some TeX code */
318 int token = 0;
319 int max;
320 int numcols;
321 unsigned int lines;
322 int inblock = 0;
323 int csvrows;
324 int firstrow = 1;
325 int nosep = 0;
326 int token1 = 0;
327 int token2 = 0;
328 char headerrow[1000];
329 headerrow[0] = '\0';
330
331 max = numcols = conf->cols;
332 csvrows = conf->rows;
333 /* choose infinity when conf->lines is 0 */
334 lines = (conf->lines) ? conf->lines : MAXUINT;
335
336 while (token != EOF) {
337 token2 = token1; /* second last character, used for detection of quotation marks */
338 token1 = token; /* last character, used for detection of quotation marks */
339 token = getc(in);
340
341 /* EOF ? */
342 if (token == EOF) {
343 continue;
344 }
345
346 /* new line ? */
347 if (token == '\n') {
348 inblock = 0; /* close block if any */
349 /* fill empty cols if any */
350 while (numcols > 1) {
351 putc(newsep, out);
352 numcols--;
353 }
354 if (!(firstrow && (conf->longtable && conf->repeat))) {
355 fprintf(out, "\\\\\n"); /* TeX new line */
356 } else { /* first row and repeat and longtable */
357 fprintf(out, "%s\\\\\n", headerrow);
358 }
359 if (conf->hlines) {
360 fprintf(out, "\\hline\n"); /* TeX draw hline */
361 }
362 if (firstrow && (conf->longtable && conf->repeat)) {
363 fprintf(out, "%s", "\\endhead\n");
364 }
365 if (firstrow && conf->repeat) {
366 char tmp[12];
367 sprintf(tmp, (conf->hlines ? "\\\\\n\\hline" : "\\\\\n"));
368 strcat(headerrow, tmp);
369 }
370 firstrow = 0;
371 numcols = max; /* reset numcols */
372 lines--;
373 csvrows--;
374 /* put a colored row or not (alternate) */
375 if (conf->clrrow && (lines % 2)) {
376 fprintf(out, "\\colorrow ");
377 }
378 /* if the LaTeX tabular is full create a new one, except if no more row or this is a long table */
379 if (!lines && csvrows && !conf->longtable) {
380 fprintf(out, "\\end{tabular}\n");
381 fprintf(out, "\\newline\n");
382 fprintf(out, "\\begin{tabular}{");
383 if (conf->vlines) {
384 putc('|', out);
385 }
386 while (numcols--) {
387 putc(conf->pos, out);
388 if (conf->vlines) {
389 putc('|', out);
390 }
391 }
392 fprintf(out, "}\n");
393 if (conf->hlines) {
394 fprintf(out, "\\hline\n");
395 }
396 if (conf->repeat && !conf->longtable) {
397 fprintf(out, "%s", headerrow);
398 putc('\n', out);
399 }
400 numcols = max;
401 lines = (conf->lines) ? conf->lines : MAXUINT;
402 }
403 /* else end of CSV data */
404 continue;
405 }
406
407 /* if commas in cells */
408 /* thx to <florian@heinze.at> */
409 if (token == '\"') {
410 if (nosep == 0)
411 nosep = 1;
412 else
413 nosep = 0;
414 }
415
416 /* new column ? */
417 if ((token == conf->sep && ((conf->block && !inblock) || !conf->block)) && nosep == 0) {
418 if (!(firstrow && (conf->longtable && conf->repeat)))
419 putc(newsep, out);
420 numcols--;
421 if (firstrow && conf->repeat)
422 {
423 char tmp[2];
424 tmp[0] = newsep;
425 tmp[1] = '\0';
426 strcat(headerrow, tmp);
427 }
428 continue;
429 }
430
431 /* enter/quit a block ? */
432 if (conf->block && token == conf->block) {
433 inblock = !inblock;
434 continue;
435 }
436
437 /* data ? */
438 if ((token != conf->block && ((conf->block && inblock) || !conf->block))
439 && ((token == '\"' && token1 == '\"' && token2 == '\"') || token != '\"')) {
440 /* look for special TeX char to escape */
441 /* FIXME: put all that into a subroutine */
442 int i = 0;
443 if (conf->escape)
444 for (i = 0; i < conf->tex->size; i++) {
445 if (token == conf->tex->tab[i]) {
446 switch (token) {
447 case '\\':
448 fprintf(out, "\\textbackslash{}");
449 if (firstrow && conf->repeat)
450 {
451 char tmp[17];
452 sprintf(tmp, "\\textbackslash{}");
453 strcat(headerrow, tmp);
454 }
455 break;
456 default:
457 fprintf(out, "\\%c", token);
458 if (firstrow && conf->repeat)
459 {
460 char tmp[3];
461 tmp[0] = '\\';
462 tmp[1] = token;
463 tmp[2] = '\0';
464 strcat(headerrow, tmp);
465 }
466 break;
467 }
468 break; /* there was some escaping */
469 }
470 }
471 /* or print raw char */
472 if ((i >= conf->tex->size) || (!conf->escape)) {
473 if (!(firstrow && (conf->longtable && conf->repeat)))
474 putc(token, out); /* do not print the header twice */
475 if (firstrow && conf->repeat)
476 {
477 char tmp[2];
478 tmp[0] = token;
479 tmp[1] = '\0';
480 strcat(headerrow, tmp);
481 }
482 }
483 continue;
484 }
485 /* do nothing if unexpected char: just loop */
486 }
487 return;
488 }
489
490 void doTeXdoc(config* conf, FILE* in, FILE* out) {
491 /* prepares the LaTeX tabular layout */
492 int maxcols;
493 int numcols;
494 char* relsize[5] = {"0", "0.5", "1", "2", "4"}; /* LaTeX relsize good values */
495 char* tabcolsep[5] = {"0", "0.05", "0.1", "0.2", "0.4"}; /* LaTeX tabcolsep good values */
496
497 numcols = maxcols = conf->cols;
498 if (conf->header) {
499 fprintf(out, "\\documentclass[a4paper]{article}\n");
500 fprintf(out, "\\usepackage[T1]{fontenc}\n");
501 fprintf(out, "\\usepackage[utf8]{inputenc}\n");
502 if (conf->red) {
503 fprintf(out, "\\usepackage{relsize}\n");
504 }
505 if (conf->clrrow) {
506 fprintf(out, "\\usepackage{colortbl}\n");
507 }
508 if (conf->longtable) {
509 fprintf(out, "\\usepackage{longtable}\n");
510 }
511 fprintf(out, "\\begin{document}\n");
512 }
513 if (conf->clrrow) {
514 fprintf(out, "\\def\\colorrow{\\rowcolor[gray]{%s}}\n",
515 conf->clrrow);
516 }
517 if (conf->red) {
518 fprintf(out, "\\relsize{-%s}\n", relsize[conf->red]);
519 fprintf(out, "\\addtolength\\tabcolsep{-%sem}\n", tabcolsep[conf->red]);
520 }
521 if (conf->longtable)
522 {
523 fprintf(out, "\\begin{longtable}{");
524 if (conf->vlines)
525 putc('|', out);
526 }
527 else
528 {
529 fprintf(out, "\\begin{tabular}{");
530 if (conf->vlines)
531 putc('|', out);
532 }
533 while (numcols--)
534 {
535 fprintf(out, "%c", conf->pos); /* position in cell */
536 if (conf->vlines)
537 putc('|', out);
538 }
539 fprintf(out, "}\n");
540 if (conf->hlines)
541 fprintf(out, "\\hline\n");
542 doTeXsub(conf, '&', in, out); /* & is LaTeX separator */
543 if (conf->longtable) {
544 fprintf(out, "\\end{longtable}\n");
545 } else {
546 fprintf(out, "\\end{tabular}\n");
547 }
548 if (conf->red) {
549 fprintf(out, "\\addtolength\\tabcolsep{+%sem}\n", tabcolsep[conf->red]);
550 fprintf(out, "\\relsize{+%s}\n", relsize[conf->red]);
551 }
552 if (conf->header) {
553 fprintf(out, "\\end{document}\n");
554 }
555 return;
556 }
557
558 int main (int argc, char **argv) {
559 FILE* fp;
560 config* conf;
561
562 extern int optind, opterr, optopt;
563
564 conf = (config*)malloc(sizeof(config));
565 /* defaults (ensure init): */
566 conf->cols = 1; /* CSV: if getMaximums fails */
567 conf->rows = 0; /* CSV: must be 0 */
568 conf->chars = 0; /* CSV: must be 0 */
569 conf->pos = 'l'; /* usual; LaTeX */
570 conf->lines = 40; /* usual; LaTeX */
571 conf->guess = 0; /* usual */
572 conf->sep = ','; /* default; csv */
573 conf->block = 0; /* default; csv */
574 conf->header = 1; /* usual; LaTeX */
575 conf->escape = 1; /* usual; LaTeX */
576 conf->clrrow = NULL; /* default; LaTeX */
577 conf->red = 0; /* default; LaTeX */
578 conf->longtable = 0; /* default; without package longtable */
579 conf->repeat = 0; /* default; do not repeat the header row */
580 conf->vlines = 1; /* default; draw lines between columns */
581 conf->hlines = 1; /* default; draw lines between rows */
582
583 /* TeX charaters to escape */
584 conf->tex = (texcape*)malloc(sizeof(texcape));
585 conf->tex->tab = "\\_#$%^&{}~";
586 conf->tex->size = strlen(conf->tex->tab);
587
588
589 conf = parseOptions(conf, argc, argv);
590
591 if (optind == argc) {
592 /* copy stdin into tmp file */
593 int c;
594 fp = tmpfile();
595 while (EOF != (c = getc(stdin))) {
596 putc(c, fp);
597 }
598 rewind(fp);
599 } else {
600 fp = fopen(argv[optind], "r");
601 }
602
603 if (!fp) {
604 fprintf(stderr, "Can't open file %s\n", argv[optind]);
605 exit(EXIT_FAILURE);
606 }
607
608 if (conf->guess) {
609 if (guessCSV(conf, fp)) {
610 fprintf(stderr, "Please run again by using --delimiter (if any) and --separator\n");
611 fclose(fp);
612 exit(EXIT_FAILURE);
613 }
614 rewind(fp);
615 }
616
617 getMaximums(conf, fp);
618 rewind(fp);
619
620 doTeXdoc(conf, fp, stdout);
621
622 free(conf->tex);
623 if (conf->clrrow)
624 free(conf->clrrow);
625 free(conf);
626 fclose(fp);
627
628 exit(EXIT_SUCCESS);
629 }

  ViewVC Help
Powered by ViewVC 1.1.26