/[csv2latex]/csv2latex.c
ViewVC logotype

Annotation of /csv2latex.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 33 - (hide annotations)
Wed Oct 19 17:53:07 2016 UTC (3 years, 1 month ago) by (unknown author)
File MIME type: text/plain
File size: 17734 byte(s)
handle input from stdin
1 ben 1 /*
2     * csv2latex.c, copyright © 2002- Benoît Rouits <brouits@free.fr>
3     *
4     *********************************************************
5     * csv2latex translates a .csv file to a LaTex document. *
6     *********************************************************
7     *
8     * This program is free software; you can redistribute it and/or
9     * modify it under the terms of the GNU General Public License
10     * as published by the Free Software Foundation; version 2 only
11     * of the License.
12     *
13     * This program is distributed in the hope that it will be useful,
14     * but WITHOUT ANY WARRANTY; without even the implied warranty of
15     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     * GNU General Public License for more details.
17     *
18     * You should have received a copy of the GNU General Public License
19     * along with this program; if not, write to the Free Software
20     * Foundation, Inc., 51 Franklin Street, Fifth Floor,
21     * Boston, MA 02110-1301, USA.
22     *
23     * see the COPYING file included in the csv2latex package or
24     * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
25     *
26     */
27    
28     #include <stdlib.h>
29     #include <stdio.h>
30     #include <string.h>
31     #include <libgen.h>
32     #include <getopt.h>
33     #include <unistd.h>
34     #include <ctype.h>
35     #include "version.h"
36    
37     typedef struct {
38     char* tab; /* actual escapes */
39     int size; /* escape tab len */
40     } texcape;
41    
42     typedef struct {
43     char block; /* CSV delimitor if any */
44     char sep; /* CSV separator */
45     unsigned int cols; /* CSV columns */
46     unsigned int chars; /* CSV max data length */
47     unsigned int rows; /* CSV total number of lines */
48     char pos; /* position in cell (align) */
49     unsigned int lines; /* rows per LaTeX tabular */
50     unsigned int guess; /* guess or not the CSV format */
51     unsigned int header; /* put LaTeX document header or not */
52     unsigned int red; /* table reduction level (from 1 to 4)*/
53     unsigned int longtable; /* use package longtable */
54     unsigned int escape; /* escape TeX control chars or not */
55 ben 6 unsigned int repeat; /* repeat table headers for each LaTeX table section or not */
56     unsigned int vlines; /* insert vertical lines between columns or not */
57     unsigned int hlines; /* insert horizontal lines between rows or not */
58 ben 1 char* clrrow; /* row graylevel (from 0 to 1) */
59     texcape* tex; /* TeX escapes */
60     } config;
61    
62     #define MAXUINT ((unsigned int)(-1))
63    
64     void rtfm(char* prog) {
65     printf("%s translates a csv file to a LaTeX file\n", basename(prog));
66     printf("Example: %s january_stats.csv > january_stats.tex\n", basename(prog));
67     printf("Usage: %s [--nohead] (LaTeX) no document header: useful for inclusion\n", basename(prog));
68     printf(" [--longtable] (LaTeX) use package longtable: useful for long input\n");
69     printf(" [--noescape] (LaTeX) do not escape text: useful for mixed CSV/TeX input\n");
70     printf(" [--guess] (CSV) guess separator and block |\n"
71     " [--separator <(c)omma|(s)emicolon|(t)ab|s(p)ace|co(l)on>] (CSV's comma)\n"
72     " [--block <(q)uote|(d)ouble|(n)one>] (CSV) block delimiter (e.g: none)\n");
73     printf(" [--lines n] (LaTeX) rows per table: useful for long tabulars\n");
74     printf(" [--position <l|c|r>] (LaTeX) text align in cells\n");
75     printf(" [--colorrows graylevel] (LaTeX) alternate gray rows (e.g: 0.75)\n");
76     printf(" [--reduce level] (LaTeX) reduce table size (e.g: 1)\n");
77 ben 6 printf(" [--repeatheader] (LaTeX) repeat table header (for long tables)\n");
78     printf(" [--nohlines] (LaTeX) don't put hline between table rows\n");
79     printf(" [--novlines] (LaTeX) don't put vline between columns\n");
80 ben 1 printf(" csv_file.csv\n");
81     printf("The \"longtable\" option needs the {longtable} LaTeX package\n");
82     printf("The \"colorrows\" option needs the {colortbl} LaTeX package\n");
83     printf("The \"reduce\" option needs the {relsize} LaTeX package\n");
84     return;
85     }
86    
87     config* parseOptions (config* conf, int argc, char **argv) {
88     /* thx to <vfebvre@lautre.net> */
89     int opt;
90     int tmp;
91    
92     #if defined USE_GETOPT
93     #else
94     int longopt_index = 0;
95     static struct option long_options[] = {
96 33 {"help", 0, NULL, 'h'},
97     {"guess", 0, NULL, 'g'},
98     {"block", 1, NULL, 'b'},
99     {"lines", 1, NULL, 'l'},
100     {"noescape", 0, NULL, 'x'},
101     {"nohead", 0, NULL, 'n'},
102     {"version", 0, NULL, 'v'},
103     {"position", 1, NULL, 'p'},
104     {"separator", 1, NULL, 's'},
105     {"colorrows", 1, NULL, 'c'},
106     {"reduce", 1, NULL, 'r'},
107     {"longtable", 0, NULL, 't'},
108     {"repeatheader", 0, NULL, 'e'},
109     {"novlines", 0, NULL, 'y'},
110     {"nohlines", 0, NULL, 'z'},
111     {NULL, 0, NULL, 0} /* marks end-of-list */
112 ben 1 };
113     #endif
114     #if defined USE_GETOPT
115 ben 6 while ((opt = getopt (argc, argv, "hvgnxteyz?b:l:p:s:c:r:")) != EOF) {
116 ben 1 #else
117 ben 6 while ((opt = getopt_long (argc, argv, "hvgnxteyz?b:l:p:s:c:r:", long_options, &longopt_index)) > 0) {
118 ben 1 #endif
119     switch (opt) {
120     case '?':
121     case 'h':
122     rtfm (argv[0]);
123     exit (EXIT_SUCCESS);
124     break;
125     case 'g': /* guess the CSV */
126 33 conf->guess = 1;
127 ben 1 break;
128 ben 13 case 't': /* use package longtable */ /* thx to <Christof.Bodner@infineon.com> */
129 33 conf->longtable = 1;
130 ben 1 break;
131     case 'b': /* csv block delimiter */
132     if(optarg[0] == 'q')
133     conf->block = '\'';
134     else if(optarg[0] == 'd')
135     conf->block = '"';
136     else if(optarg[0] == 'n')
137     conf->block = 0; /* no block delimiter */
138     break;
139     case 'l': /* number of lines per TeX tabulars */
140     if(isdigit(optarg[0])) {
141 33 conf->lines = atoi(optarg);
142 ben 1 } else {
143     fprintf(stderr,
144     "option \"lines\" need a positive integer value\n");
145     exit(EXIT_FAILURE);
146     }
147     break;
148     case 'n':
149 33 conf->header = 0;
150 ben 1 break;
151     case 'x':
152 33 conf->escape = 0;
153 ben 1 break;
154     case 'v': /* version */
155     printf ("%s © 2002- Benoît Rouits <brouits@free.fr>\n"
156     "\tVersion %s (%s)\n", PACKAGE, VERSION, RELEASE_DATE);
157     exit (EXIT_SUCCESS);
158     break;
159     case 'p': /* LaTeX position in cell */
160 33 conf->pos = optarg[0]; /* position char in cell */
161 ben 1 break;
162     case 's': /* csv block separator */
163     if(optarg[0] == 'c')
164     conf->sep = ',';
165     else if(optarg[0] == 's')
166     conf->sep = ';';
167     else if(optarg[0] == 't')
168     conf->sep = '\t';
169     else if(optarg[0] == 'p')
170     conf->sep = ' ';
171     else if(optarg[0] == 'l')
172     conf->sep = ':';
173     break;
174     case 'c': /* color rows (thx to <jcorso@cse.Buffalo.EDU>) */
175     if(isdigit(optarg[0])) {
176     conf->clrrow = (char*)malloc(strlen(optarg)+1);
177 33 strcpy(conf->clrrow, optarg);
178 ben 1 } else {
179     fprintf(stderr,
180     "option \"colorrows\" needs a real value between 0 and 1\n");
181     exit(EXIT_FAILURE);
182     }
183     break;
184     case 'r': /* reduce table size (original idea thx to <boaz.gezer@gmail.com>) */
185    
186     if(isdigit(optarg[0])) {
187     tmp = atoi(optarg);
188 33 conf->red = (tmp>4) ? 4 : (tmp<0) ? 0 : tmp; /* [1-4] */
189 ben 1 } else {
190     fprintf(stderr,
191     "option \"reduce\" needs an integer value between 1 and 4\n");
192     exit(EXIT_FAILURE);
193     }
194     break;
195 ben 6 case 'e': /*repeat table header for each table section*/
196 33 conf->repeat = 1;
197 ben 6 break;
198     case 'y': /*don't draw vlines between columns*/
199 33 conf->vlines = 0;
200 ben 6 break;
201     case 'z': /*don't draw hlines between rows*/
202 33 conf->hlines = 0;
203 ben 6 break;
204 ben 1 }
205     }
206     return conf;
207     }
208     int guessCSV(config* conf, FILE* in) {
209     /* guess the block delimiter and the csv separator */
210     int token;
211    
212 33 token = getc(in); /* first char is block delimiter */
213 ben 1 if(token == EOF) {
214 33 fprintf(stderr, "ERROR: emtpy file ?\n");
215 ben 1 return(-1);
216     } else if (ispunct(token) || token == ' ') {
217     /* found first block delimiter, act this way */
218 33 conf->block = token;
219     fprintf(stderr, "Guessed '%c' as Block Delimiter\n",
220 ben 1 conf->block);
221     /* stream file while token is printable data */
222 33 while((token = getc(in)) != conf->block &&
223 ben 1 token != '\n' &&
224     token != EOF)
225     {/* getc has been done */}
226     if(token == conf->block){
227     /* second delimiter : next is separator */
228 33 conf->sep = getc(in);
229     fprintf(stderr, "Guessed '%c' as Separator\n",
230 ben 1 conf->sep);
231     return(0);
232     }else{
233     return (-1); /* what else ? */
234     }
235     }else{ /* no block delimiter, act this way */
236 33 conf->block = 0;
237     fprintf(stderr, "Guessed No Block Delimiter\n");
238 ben 1 /* stream file while input is not a control char */
239 33 while(isalnum((token = getc(in))) &&
240 ben 1 token != '\n' &&
241     token != EOF)
242     {/* getc has been done */}
243     /* guess CSV separator */
244     if(ispunct(token) || token == '\t' || token == ' '){
245 33 conf->sep = token;
246     fprintf(stderr, "Guessed %c as Separator\n", conf->sep);
247 ben 1 return(0);
248     } else { /* did not found any separator */
249 33 fprintf(stderr, "ERROR: Did not guess any Separator!\n");
250 ben 1 return(-1);
251     }
252     }
253     return(0);
254     }
255    
256     void getMaximums(config* conf, FILE* in) {
257     /* gets the number of cols and chars of a csv file assuming a separator */
258 33 int token = 0;
259     unsigned int curcol = 0;
260     unsigned int curchar = 0;
261     unsigned int inblock = 0;
262 ben 1 /* init */
263 33 conf->chars = 0;
264     conf->cols = 0;
265     conf->rows = 0;
266 ben 1
267     while (token != EOF) {
268 33 token = getc(in);
269 ben 1
270     /* EOF ? */
271     if (token == EOF) {
272     continue;
273     }
274    
275     /* decide the maximums */
276     if (token == '\n') {
277     curcol++;
278 33 conf->cols = (conf->cols<curcol) ? curcol : conf->cols;
279     conf->chars = (conf->chars<curchar) ? curchar : conf->chars;
280 ben 1 conf->rows++;
281 33 curcol = 0;
282     curchar = 0;
283     inblock = 0; /* reset block state */
284 ben 1 continue;
285     }
286    
287     /* enter/quit a block */
288     if (conf->block && token == conf->block) {
289 33 inblock = !inblock;
290 ben 1 continue;
291     }
292    
293     /* count cols in current line */
294     if (token == conf->sep && ((conf->block && !inblock) || !conf->block)) {
295     curcol++;
296     continue;
297     }
298    
299     /* count chars in current cell */
300     if (token != conf->block && ((conf->block && inblock) || !conf->block)) {
301     curchar++;
302     continue;
303     }
304     }
305     return;
306     }
307    
308     void doTeXsub(config* conf, char newsep, FILE* in, FILE* out) {
309     /* substitutes CSV sep by LaTeX newsep and some TeX code */
310 33 int token = 0;
311 ben 1 int max;
312     int numcols;
313 ben 6 unsigned int lines;
314 33 int inblock = 0;
315 ben 1 int csvrows;
316 33 int firstrow = 1;
317     int nosep = 0;
318     int token1 = 0;
319     int token2 = 0;
320 ben 6 char headerrow[1000];
321 33 headerrow[0] = '\0';
322 ben 1
323 33 max = numcols = conf->cols;
324     csvrows = conf->rows;
325 ben 1 /* choose infinity when conf->lines is 0 */
326 33 lines = (conf->lines) ? conf->lines : MAXUINT;
327 ben 1
328 33 while (token != EOF) {
329     token2 = token1; /* second last character, used for detection of quotation marks */
330     token1 = token; /* last character, used for detection of quotation marks */
331     token = getc(in);
332 ben 1
333     /* EOF ? */
334     if (token == EOF) {
335     continue;
336     }
337    
338     /* new line ? */
339     if (token == '\n') {
340     inblock = 0; /* close block if any */
341     /* fill empty cols if any */
342     while (numcols > 1) {
343 33 putc(newsep, out);
344 ben 1 numcols--;
345     }
346 ben 6 if (!(firstrow && (conf->longtable && conf->repeat))) {
347 33 fprintf(out, "\\\\\n"); /* TeX new line */
348 ben 6 } else { /* first row and repeat and longtable */
349     fprintf(out, "%s\\\\\n", headerrow);
350     }
351     if(conf->hlines) {
352 33 fprintf(out, "\\hline\n"); /* TeX draw hline */
353 ben 6 }
354     if (firstrow && (conf->longtable && conf->repeat)) {
355     fprintf(out, "%s", "\\endhead\n");
356     }
357     if(firstrow && conf->repeat) {
358     char tmp[12];
359 33 sprintf(tmp, (conf->hlines ? "\\\\\n\\hline" : "\\\\\n"));
360     strcat(headerrow, tmp);
361 ben 6 }
362 33 firstrow = 0;
363     numcols = max; /* reset numcols */
364 ben 1 lines--;
365     csvrows--;
366     /* put a colored row or not (alternate) */
367 ben 6 if (conf->clrrow && (lines % 2)) {
368 33 fprintf(out, "\\colorrow ");
369 ben 1 }
370 ben 6 /* if the LaTeX tabular is full create a new one, except if no more row or this is a long table */
371     if (!lines && csvrows && !conf->longtable) {
372 33 fprintf(out, "\\end{tabular}\n");
373 ben 6 fprintf(out, "\\newline\n");
374 33 fprintf(out, "\\begin{tabular}{");
375 ben 6 if(conf->vlines) {
376 33 putc('|', out);
377 ben 6 }
378     while(numcols--) {
379     putc(conf->pos, out);
380     if(conf->vlines) {
381 33 putc('|', out);
382 ben 6 }
383     }
384 ben 1 fprintf(out, "}\n");
385 ben 6 if(conf->hlines) {
386     fprintf(out, "\\hline\n");
387     }
388     if(conf->repeat && !conf->longtable) {
389 33 fprintf(out, "%s", headerrow);
390     putc('\n', out);
391 ben 6 }
392 33 numcols = max;
393     lines = (conf->lines) ? conf->lines : MAXUINT;
394 ben 6 }
395     /* else end of CSV data */
396 ben 1 continue;
397     }
398    
399 ben 13 /* if commas in cells */
400     /* thx to <florian@heinze.at> */
401     if (token == '\"'){
402 33 if (nosep == 0)
403     nosep = 1;
404 ben 13 else
405 33 nosep = 0;
406 ben 13 }
407    
408 ben 1 /* new column ? */
409 ben 13 if ((token == conf->sep && ((conf->block && !inblock) || !conf->block)) && nosep == 0) {
410 ben 6 if (!(firstrow && (conf->longtable && conf->repeat)))
411 33 putc(newsep, out);
412 ben 1 numcols--;
413 ben 6 if(firstrow && conf->repeat)
414     {
415     char tmp[2];
416 33 tmp[0] = newsep;
417     tmp[1] = '\0';
418     strcat(headerrow, tmp);
419 ben 6 }
420 ben 1 continue;
421     }
422    
423     /* enter/quit a block ? */
424     if (conf->block && token == conf->block) {
425 33 inblock = !inblock;
426 ben 1 continue;
427     }
428    
429     /* data ? */
430 33 if ((token != conf->block && ((conf->block && inblock) || !conf->block))
431     && ((token == '\"' && token1 == '\"' && token2 == '\"') || token != '\"')) {
432 ben 1 /* look for special TeX char to escape */
433     /* FIXME: put all that into a subroutine */
434 33 int i = 0;
435 ben 1 if (conf->escape)
436 33 for (i = 0; i < conf->tex->size; i++) {
437 ben 1 if (token == conf->tex->tab[i]) {
438     switch (token) {
439     case '\\':
440     fprintf(out, "\\textbackslash{}");
441 ben 6 if(firstrow && conf->repeat)
442     {
443     char tmp[17];
444 33 sprintf(tmp, "\\textbackslash{}");
445     strcat(headerrow, tmp);
446 ben 6 }
447 ben 1 break;
448     default:
449     fprintf(out, "\\%c", token);
450 ben 6 if(firstrow && conf->repeat)
451     {
452     char tmp[3];
453 33 tmp[0] = '\\';
454     tmp[1] = token;
455     tmp[2] = '\0';
456     strcat(headerrow, tmp);
457 ben 6 }
458 ben 1 break;
459     }
460     break; /* there was some escaping */
461     }
462     }
463     /* or print raw char */
464 33 if ((i >= conf->tex->size) || (!conf->escape)) {
465 ben 6 if (!(firstrow && (conf->longtable && conf->repeat)))
466     putc(token, out); /* do not print the header twice */
467     if(firstrow && conf->repeat)
468     {
469     char tmp[2];
470 33 tmp[0] = token;
471     tmp[1] = '\0';
472     strcat(headerrow, tmp);
473 ben 6 }
474 ben 1 }
475     continue;
476     }
477     /* do nothing if unexpected char: just loop */
478     }
479     return;
480     }
481    
482     void doTeXdoc(config* conf, FILE* in, FILE* out) {
483     /* prepares the LaTeX tabular layout */
484     int maxcols;
485     int numcols;
486     char* relsize[5] = {"0", "0.5", "1", "2", "4"}; /* LaTeX relsize good values */
487     char* tabcolsep[5] = {"0", "0.05", "0.1", "0.2", "0.4"}; /* LaTeX tabcolsep good values */
488    
489 33 numcols = maxcols = conf->cols;
490 ben 1 if(conf->header){
491     fprintf(out, "\\documentclass[a4paper]{article}\n");
492     fprintf(out, "\\usepackage[T1]{fontenc}\n");
493     fprintf(out, "\\usepackage[latin1]{inputenc}\n");
494     if (conf->red){
495 33 fprintf(out, "\\usepackage{relsize}\n");
496 ben 1 }
497     if (conf->clrrow){
498 33 fprintf(out, "\\usepackage{colortbl}\n");
499 ben 1 }
500     if (conf->longtable){
501 33 fprintf(out, "\\usepackage{longtable}\n");
502 ben 1 }
503     fprintf(out, "\\begin{document}\n");
504     }
505     if (conf->clrrow){
506 33 fprintf(out, "\\def\\colorrow{\\rowcolor[gray]{%s}}\n",
507 ben 1 conf->clrrow);
508     }
509     if (conf->red){
510 33 fprintf(out, "\\relsize{-%s}\n", relsize[conf->red]);
511     fprintf(out, "\\addtolength\\tabcolsep{-%sem}\n", tabcolsep[conf->red]);
512 ben 1 }
513     if (conf->longtable)
514 ben 6 {
515     fprintf(out, "\\begin{longtable}{");
516     if(conf->vlines)
517 33 putc('|', out);
518 ben 6 }
519 ben 1 else
520 ben 6 {
521     fprintf(out, "\\begin{tabular}{");
522     if(conf->vlines)
523 33 putc('|', out);
524 ben 6 }
525 ben 1 while(numcols--)
526 ben 6 {
527 33 fprintf(out, "%c", conf->pos); /* position in cell */
528 ben 6 if(conf->vlines)
529 33 putc('|', out);
530 ben 6 }
531 ben 1 fprintf(out, "}\n");
532 ben 6 if(conf->hlines)
533     fprintf(out, "\\hline\n");
534 ben 1 doTeXsub(conf, '&', in, out); /* & is LaTeX separator */
535     if (conf->longtable) {
536     fprintf(out, "\\end{longtable}\n");
537     } else {
538     fprintf(out, "\\end{tabular}\n");
539     }
540     if (conf->red){
541 33 fprintf(out, "\\addtolength\\tabcolsep{+%sem}\n", tabcolsep[conf->red]);
542     fprintf(out, "\\relsize{+%s}\n", relsize[conf->red]);
543 ben 1 }
544     if(conf->header){
545     fprintf(out, "\\end{document}\n");
546     }
547     return;
548     }
549 33
550 ben 1 int main (int argc, char **argv) {
551     FILE* fp;
552     config* conf;
553    
554     extern int optind, opterr, optopt;
555    
556 33 conf = (config*)malloc(sizeof(config));
557 ben 1 /* defaults (ensure init): */
558 33 conf->cols = 1; /* CSV: if getMaximums fails */
559     conf->rows = 0; /* CSV: must be 0 */
560     conf->chars = 0; /* CSV: must be 0 */
561     conf->pos = 'l'; /* usual; LaTeX */
562     conf->lines = 40; /* usual; LaTeX */
563     conf->guess = 0; /* usual */
564     conf->sep = ','; /* default; csv */
565     conf->block = 0; /* default; csv */
566     conf->header = 1; /* usual; LaTeX */
567     conf->escape = 1; /* usual; LaTeX */
568     conf->clrrow = NULL; /* default; LaTeX */
569     conf->red = 0; /* default; LaTeX */
570     conf->longtable = 0; /* default; without package longtable */
571     conf->repeat = 0; /* default; do not repeat the header row */
572     conf->vlines = 1; /* default; draw lines between columns */
573     conf->hlines = 1; /* default; draw lines between rows */
574 ben 1
575     /* TeX charaters to escape */
576 33 conf->tex = (texcape*)malloc(sizeof(texcape));
577 ben 1 conf->tex->tab = "\\_#$%^&{}~";
578 33 conf->tex->size = strlen(conf->tex->tab);
579 ben 1
580 33
581     conf = parseOptions(conf, argc, argv);
582    
583     if (optind == argc) {
584     /* copy stdin into tmp file */
585     char c;
586     fp = tmpfile();
587     while (EOF != (c = getc(stdin))) {
588     putc(c, fp);
589     }
590     rewind(fp);
591     } else {
592     fp = fopen(argv[optind], "r");
593     }
594    
595 ben 1 if (!fp){
596 33 fprintf(stderr, "Can't open file %s\n", argv[optind]);
597 ben 1 exit(EXIT_FAILURE);
598     }
599 33
600 ben 1 if(conf->guess){
601     if(guessCSV(conf, fp)){
602 33 fprintf(stderr, "Please run again by using -- delimiter (if any) and --separator\n");
603 ben 1 exit(EXIT_FAILURE);
604     }
605     rewind(fp);
606     }
607 33
608 ben 1 getMaximums(conf, fp);
609     rewind(fp);
610     doTeXdoc(conf, fp, stdout);
611     free(conf->tex);
612     if (conf->clrrow) free(conf->clrrow); conf->clrrow=NULL;
613     free(conf);
614     fclose(fp);
615 33
616     exit(EXIT_SUCCESS);
617 ben 1 }

  ViewVC Help
Powered by ViewVC 1.1.26