/* Trap positions should be flushed *before* the .ev, unfortunately, they might not be changed until after the .ev is already out! Agh. Allow these options: -h disable headers/footers +h enable headers/footers (default) Bottom position should be written in terms of page length, which should be stored in a register. Need -me, -mm, -ms specific stuff. Is there a way of detecting them from within troff? Allow header/footer suppression? (for cases where the code written is just completely wrong.) Need to handle \titlepg. rtf2troff - read rtf input, write troff output Syntax: rtf2troff options file Options: -e echo token information -me write -me specific output -mm write -mm specific output -ms write -ms specific output -s disable strikethrough +s enable strikethrough (default) -t troff specify troff version -u disable underlining +u enable underlining (default) This translator was written to be used with the Primate Center's troff-variant (xroff). xroff dependencies are bracketed within "if (xroff) { ... }" constructions. 29 Jan 91 Paul DuBois dubois@primate.wisc.edu 29 Jan 91 V1.0. Created. 31 Jan 91 Made to work. :-) 27 Feb 91 V1.01. Updated for distribution 1.05. 28 Feb 91 V1.02. Began updating for distribution 1.06. -- troff puts paragraph markers at the beginning of paragraphs, RTF puts 'em at end; the difference is a pain. "Small caps" is done by \s-1 ... \s+1 and capitalizing everything in between. Good underlining is hard to do in troff; each character is underlined individually, which generates an abysmal amount of output. Strikethrough output is even worse. -- Deficiencies are myriad. They include, but are by no means limited to: No formula support. Poor table support. Tabs within cells are botched. Tables are always centered. Tables often come out looking pretty ugly. space before, after and between don't appear to always work. troff has no decimal tabs; they are treated as (ugh) right justified tabs. Vertical lines ("bar tabs") are unsupported. Poor font support (only R, I, B, not even I+B). Font table ignored. Line numbering ignored, for no good reason. Only normal and continuous underlining are supported. No outline or shadow, since troff can't do that. All-caps, small-caps doesn't work for non-ASCII characters (> 127) or special (mapped) characters. (Of course, why should it?) Default space between lines should be "auto" - but how to do that? Mixing of leader characters within a paragraph is not supported. Is nTabs = 0 handled incorrectly? Mechanism for handling invisible text is inconsistent. It's hard to do correctly...particularly as not everything is ignored when \v is in effect, and I'm not sure exactly what governs whether something is ignored or not. */ # include # include # include # include "rtf.h" # include "rtf2troff.h" int tvers = TROFF; int mvers = noMacros; State *is; /* current internal state */ DocState *ids; /* internal document state */ SectState *iss; /* internal section state */ ParState *ips; /* internal paragraph state */ CharState *ics; /* internal character state */ State *ws; /* written state */ DocState *wds; /* written document state */ SectState *wss; /* written section state */ ParState *wps; /* written paragraph state */ CharState *wcs; /* written character state */ /* Whether internal state has been changed since written state was last synchronized to it. Initially true so that internal state will be flushed when first content character is written. */ int docStateChanged = 1; /* document properties have changed */ int sectStateChanged = 1; /* section properties have changed */ int parStateChanged = 1; /* paragraph properties have changed */ int charStateChanged = 1; /* character properties have changed */ /* Indirection level for escape (\) processing. Incremented during macro/diversion collection. */ int indirectionLevel = 0; static int haveTables = 0; /* non-zero if any seen */ static TblState tableState; TblState *its = &tableState; int inTable = 0; /* whether in table cell or not */ /* Default output stream */ FILE *f = stdout; static int allowUnderline = 1; static int allowStrikeThru = 1; static char *usage = "Usage: rtf2troff [-e] [-me|-mm|-ms] [-s|+s] [-t troff] [-u|+u] file"; static void TokenEcho (); static void Unknown (); static void Text (); static void Group (); static void Control (); static void BeginDestination (); static void EndDestination (); static void Destination (); static void CharSet (); static void SpecialChar (); static void DocAttr (); static void SectAttr (); static void ParAttr (); static void CharAttr (); static void SetNextTabType (); static void SetNextTabPos (); static void SetTabChar (); int main (argc, argv) int argc; char **argv; { char *troff = "troff"; char *macros = NULL; RTFInit (); /* Process arguments. The -t and -me|-mm|-ms arguments must be remembered and applied *in that order* after argument processing, or char maps may not be selected properly. */ --argc; ++argv; while (argc > 1 && **argv == '-') { if (strcmp ("-e", *argv) == 0) RTFSetReadHook (TokenEcho); else if (strcmp ("-s", *argv) == 0) allowStrikeThru = 0; else if (strcmp ("+s", *argv) == 0) allowStrikeThru = 1; else if (strcmp ("-u", *argv) == 0) allowUnderline = 0; else if (strcmp ("+u", *argv) == 0) allowUnderline = 1; else if (strcmp ("-t", *argv) == 0) { if (argc < 2) { fprintf (stderr, "%s\n", usage); exit (1); } --argc; ++argv; troff = *argv; if (strcmp (troff, "troff") == 0) tvers = TROFF; else if (strcmp (troff, "xroff") == 0) tvers = XROFF; else if (strcmp (troff, "pstroff") == 0) tvers = PSTROFF; } else if (strcmp ("-me", *argv) == 0) { macros = *argv; mvers = meMacros; } else if (strcmp ("-mm", *argv) == 0) { macros = *argv; mvers = mmMacros; } else if (strcmp ("-ms", *argv) == 0) { macros = *argv; mvers = msMacros; } else { fprintf (stderr, "Unknown option: %s\n", *argv); fprintf (stderr, "%s\n", usage); exit (1); } --argc; ++argv; } SelectFormatterMaps (troff); if (macros != NULL) SelectMacPackMaps (macros); /* not clever; only allows stdin or one named file to be read */ if (argc > 0) { if (freopen (argv[0], "r", stdin) == NULL) { fprintf (stderr, "Can't open \"%s\"\n", argv[0]); exit (1); } } /* Install writer callbacks into reader and initialize state (sets up pointers into internal state 0, and equates initial written state to internal state). */ RTFSetClassCallback (rtfText, Text); RTFSetClassCallback (rtfGroup, Group); RTFSetClassCallback (rtfControl, Control); RTFSetClassCallback (rtfUnknown, Unknown); InitState (); /* If macro package needs to be told special characters might be used, do so. */ if (mvers == meMacros) fprintf (f, ".sc\n"); else if (mvers == msMacros) fprintf (f, ".AM\n"); /* Process the input stream. Make sure the first intoken is a "{" so a state push will occur before anything else (need to preserve state 0 intact for section, paragraph, character default restoration). */ (void) RTFGetToken (); if (!RTFCheckCM (rtfGroup, rtfBeginGroup)) { fprintf (stderr, "malformed rtf file - "); fprintf (stderr, "does not begin with \"{\"\n"); exit (1); } RTFRouteToken (); /* send "{" through router */ RTFRead (); /* read everything else */ Flush (); /* some diagnostic stuff */ CheckFinalState (); if (haveTables) fprintf (stderr, "Output contains tables (run through tbl)\n"); exit (0); } /* Token echo function to implement -e */ static void TokenEcho () { fprintf (f, "%d\t%d\t%d\t%d\t\"%s\"\n", rtfClass, rtfMajor, rtfMinor, rtfParam, rtfTextBuf); } /* ---------------------------------------------------------------------- */ /* Token class handlers */ /* Echo any unknown tokens. This helps know where translator needs to be extended. */ static void Unknown () { fprintf (stderr, "Unknown symbol %s\n", rtfTextBuf); } /* Group operator. Push or pop internal state level. Before a pop, check whether the destination needs any shutdown. */ static void Group () { switch (rtfMajor) { case rtfBeginGroup: /* push */ PushIState (); break; case rtfEndGroup: /* pop */ /* do end-of-destination procecssing, then pop state */ if (is->destination != rtfNoDestination) EndDestination (); PopIState (); break; } } /* Have a text char, write it out. Perform special char mapping for chars > 127, do escapes for backslashes. For normal characters, perform to-upper processing. */ static void Text () { char buf[2], *p; if (rtfMajor > 127) /* non-ASCII, map to troff equiv. */ p = CharMapping (rtfMajor); else if (rtfMajor == '\\') /* escape; escape it */ p = "\\e"; else /* regular unmapped, unescaped char */ { if (ics->charStyle & (styleAllCaps | styleSmallCaps)) { /* OK to use islower()/toupper() because char is known to be <= 127 */ if (islower (rtfMajor)) rtfMajor = toupper (rtfMajor); } buf[0] = rtfMajor; buf[1] = '\0'; p = buf; } PutString (p); } /* The char sets, special chars and destinations do not involve a state change; most other control things do. */ static void Control () { switch (rtfMajor) { case rtfCharSet: CharSet (); break; case rtfSpecialChar: SpecialChar (); break; case rtfDestination: BeginDestination (); break; case rtfDocAttr: DocAttr (); break; case rtfSectAttr: SectAttr (); break; case rtfParAttr: ParAttr (); break; case rtfCharAttr: CharAttr (); break; case rtfTblAttr: TblAttr (); break; } } static void CharSet () { SelectCharSetMaps (rtfMinor); } /* The hyphen and dash control things are treated as though they were rtfText here. An extra level of indirection is added to the page number register. */ static void SpecialChar () { char buf[rtfBufSiz]; switch (rtfMinor) { case rtfCurHeadPage: PutString ("\\\\n%"); /* reference page number register */ break; case rtfCurHeadDate: /* unimplemented */ break; case rtfCurHeadTime: /* unimplemented */ break; case rtfNoBrkSpace: PutString ("\\ "); break; case rtfNoReqHyphen: PutString ("\\%"); break; case rtfNoBrkHyphen: PutString ("\\-"); break; case rtfSect: Sect (); break; case rtfRow: /* end of cell, and of row/table */ EndCell (); EndTbl (); break; case rtfLine: Par (); break; case rtfPar: Par (); break; case rtfCell: EndCell (); /* end current cell */ BeginCell (); /* begin next cell */ break; case rtfTab: PutString ("\t"); break; case rtfPage: Par (); fprintf (f, ".bp\n"); break; } } /* Begin/End destination don't try to do anything except dump out comments delimiting the destination. Something "real" should be done, but at least the comment is better than nothing. The switch explicitly lists those destinations for which something intelligent should be done. (But nothing is, yet.) Everything else falls under the default case and is simply skipped anonymously. When a destination is skipped, the "}" is fed back into the router so the group state gets popped by Group(). */ static void BeginDestination () { Destination (1); } static void EndDestination () { Destination (0); } static void Destination (startDest) int startDest; { char *dp = NULL; /* destination name */ char *mp = NULL; /* macro name */ char *rp = NULL; /* register name */ char *sp = NULL; /* skipped destination name */ int reset = 0; /* need reset to defaults? */ /* if beginning destination, set dest type */ if (startDest) is->destination = rtfMinor; /* switch on destination type */ switch (is->destination) { case rtfFootnote: /* Don't skip, but don't start diversion: effect is to leave footnote text in main document body. Incorrect, but better than losing it. Eventually, footnotes should be caught in diversions. */ dp = "footnote"; break; case rtfHeader: dp = "header"; mp = mHeaderAll; rp = rHeaderAll; ++reset; break; case rtfHeaderLeft: dp = "left header"; mp = mHeaderLeft; rp = rHeaderLeft; ++reset; break; case rtfHeaderRight: dp = "right header"; mp = mHeaderRight; rp = rHeaderRight; ++reset; break; case rtfHeaderFirst: dp = "first page header"; mp = mHeaderFirst; rp = rHeaderFirst; ++reset; break; case rtfFooter: dp = "footer"; mp = mFooterAll; rp = rFooterAll; ++reset; break; case rtfFooterLeft: dp = "left footer"; mp = mFooterLeft; rp = rFooterLeft; ++reset; break; case rtfFooterRight: dp = "right footer"; mp = mFooterRight; rp = rFooterRight; ++reset; break; case rtfFooterFirst: dp = "first page footer"; mp = mFooterFirst; rp = rFooterFirst; ++reset; break; case rtfFNSep: sp = "footnote separator"; break; case rtfFNContSep: sp = "continued footnote separator"; break; case rtfFNContNotice: sp = "continued footnote notice"; break; case rtfField: /* don't ignore, but don't capture */ dp = "field"; break; case rtfFieldInst: /* ignore */ sp = "field instruction"; break; case rtfFieldResult: /* don't ignore, but don't capture */ dp = "field result"; break; default: sp = rtfTextBuf; break; } if (dp != NULL && startDest) Comment ("begin %s", dp); if (mp != NULL) /* begin a capture macro */ { Flush (); if (startDest) { FlushInitialState (); /* make sure this is out */ FlushSectState (); /* flush trap positions */ if (rp != NULL) /* set a register */ fprintf (f, ".nr %s 1\n", rp); BeginDiversion (mp); if (reset) { /* reset paragraph, char defaults */ /* (fake a \pard and \plain) */ RTFSetToken (rtfControl, rtfParAttr, rtfParDef, -1, "\\pard"); RTFRouteToken (); RTFSetToken (rtfControl, rtfCharAttr, rtfPlain, -1, "\\plain"); RTFRouteToken (); } } else { EndDiversion (); } } if (sp != NULL) /* skip a group */ { if (startDest) { Comment ("SKIPPING %s group", sp); RTFSkipGroup (); RTFRouteToken (); /* feed "}" back into router */ } else Comment ("end skipped group"); } if (dp != NULL && startDest == 0) Comment ("end %s", dp); } static void DocAttr () { double inch = (double) rtfParam / (double) rtfTpi; switch (rtfMinor) { case rtfPaperWidth: ids->pageWidth = inch; break; case rtfPaperHeight: ids->pageHeight = inch; break; case rtfLeftMargin: ids->leftMargin = inch; break; case rtfRightMargin: ids->rightMargin = inch; break; case rtfTopMargin: ids->topMargin = inch; /* pre-emptive strike */ if (iss->headerPos >= ids->topMargin) iss->headerPos = ids->topMargin / 2; break; case rtfBottomMargin: ids->bottomMargin = inch; if (iss->footerPos >= ids->bottomMargin) iss->footerPos = ids->bottomMargin / 2; break; case rtfDefTab: ids->tabWidth = inch; break; case rtfLandscape: ids->landscape = 1; break; } ++docStateChanged; } static void SectAttr () { double inch = (double) rtfParam / (double) rtfTpi; switch (rtfMinor) { case rtfSectDef: RestoreSectDefaults (); break; case rtfNoBreak: case rtfColBreak: case rtfPageBreak: case rtfEvenBreak: case rtfOddBreak: iss->breakType = rtfMinor; break; case rtfPageStarts: iss->pageStart = rtfParam; break; case rtfPageCont: iss->pageRestart = 0; break; case rtfPageRestart: iss->pageRestart = 1; break; case rtfPageDecimal: Flush (); fprintf (f, ".af %% 1\n"); break; case rtfPageURoman: Flush (); fprintf (f, ".af %% I\n"); break; case rtfPageLRoman: Flush (); fprintf (f, ".af %% i\n"); break; case rtfPageULetter: Flush (); fprintf (f, ".af %% A\n"); break; case rtfPageLLetter: Flush (); fprintf (f, ".af %% a\n"); break; case rtfPageNumLeft: break; case rtfPageNumTop: break; case rtfHeaderY: iss->headerPos = inch; if (iss->headerPos >= ids->topMargin) iss->headerPos = ids->topMargin / 2; break; case rtfFooterY: iss->footerPos = inch; if (iss->footerPos >= ids->bottomMargin) iss->footerPos = ids->bottomMargin / 2; break; case rtfLineModulus: break; case rtfLineDist: break; case rtfLineStarts: break; case rtfLineRestart: break; case rtfLineRestartPg: break; case rtfLineCont: break; case rtfTopVAlign: break; case rtfBottomVAlign: break; case rtfCenterVAlign: break; case rtfJustVAlign: break; case rtfColumns: break; case rtfColumnSpace: break; case rtfColumnLine: break; case rtfENoteHere: break; case rtfTitleSpecial: iss->titleSpecial = 1; break; } ++sectStateChanged; } /* Paragraph defaults are restored by using the state 0 values, they applying the "Normal" style (style 0). For the rtfStyleNum, the tab flag is reset before expanding the style so any inherited tabs will be overridden by tabs in the style, and reset after expansion so any tabs in the paragraph itself will override inherited or style tabs. The "unimplemented" cases below are those which are currently ignored, but for which something might be done someday, i.e., they're reminders. */ static void ParAttr () { double inch = (double) rtfParam / (double) rtfTpi; switch (rtfMinor) { case rtfParDef: RestoreParDefaults (); break; case rtfStyleNum: ips->tabFlag = 0; RTFExpandStyle (rtfParam); ips->tabFlag = 0; break; case rtfQuadLeft: case rtfQuadRight: case rtfQuadJust: case rtfQuadCenter: ips->justification = rtfMinor; break; case rtfFirstIndent: ips->firstIndent = inch; break; case rtfLeftIndent: ips->leftIndent = inch; break; case rtfRightIndent: ips->rightIndent = inch; break; case rtfSpaceBefore: ips->spaceBefore = inch; break; case rtfSpaceAfter: ips->spaceAfter = inch; break; case rtfSpaceBetween: ips->spaceBetween = inch; break; case rtfInTable: haveTables = 1; /* If first cell of row, set temp indent to left edge of table. (Actually, this is done incorrectly; tables are always centered.) Subsequent cells are begun when \cell is seen. */ if (its->tableHeader == 0) /* first cell; need */ { /* table prolog */ BeginTbl (); BeginCell (); } break; case rtfNoLineNum: /* unimplemented */ break; case rtfTabPos: SetNextTabPos (inch); break; case rtfTabRight: case rtfTabCenter: case rtfTabDecimal: SetNextTabType (rtfMinor); break; case rtfTabBar: /* unimplemented */ break; case rtfBorderTop: ips->borderFlags |= borderTop; break; case rtfBorderBottom: ips->borderFlags |= borderBottom; break; case rtfBorderLeft: /* unimplemented */ break; case rtfBorderRight: /* unimplemented */ break; case rtfBorderBar: /* unimplemented */ break; case rtfBorderBox: /* unimplemented */ break; case rtfBorderBetween: /* unimplemented */ break; case rtfBorderSingle: case rtfBorderThick: case rtfBorderShadow: case rtfBorderDouble: case rtfBorderDot: case rtfBorderHair: ips->borderType = rtfMinor; break; case rtfBorderSpace: /* unimplemented */ break; case rtfLeaderDot: case rtfLeaderHyphen: case rtfLeaderUnder: case rtfLeaderThick: SetTabChar (rtfMinor); break; } ++parStateChanged; } /* Several of the attributes can be turned off with param value of zero (e.g., \b vs. \b0), but since the value of rtfParam is 0 if no param is given, test the text of the token directly. to find out if there's a zero at the end of it. \plain is like \pard but for characters, i.e, it restores all character defaults. */ static void CharAttr () { int turnOn = (rtfTextBuf[rtfTextLen-1] != '0'); switch (rtfMinor) { case rtfPlain: RestoreCharDefaults (); break; case rtfBold: if (turnOn) ics->charStyle |= styleBold; else ics->charStyle &= ~styleBold; break; case rtfItalic: if (turnOn) ics->charStyle |= styleItalic; else ics->charStyle &= ~styleItalic; break; case rtfStrikeThru: if (allowStrikeThru) { if (turnOn) ics->charStyle |= styleStrikeThru; else ics->charStyle &= ~styleStrikeThru; } break; case rtfOutline: if (turnOn) ics->charStyle |= styleOutline; else ics->charStyle &= ~styleOutline; break; case rtfShadow: if (turnOn) ics->charStyle |= styleShadow; else ics->charStyle &= ~styleShadow; break; case rtfSmallCaps: if (turnOn) ics->charStyle |= styleSmallCaps; else ics->charStyle &= ~styleSmallCaps; break; case rtfAllCaps: if (turnOn) ics->charStyle |= styleAllCaps; else ics->charStyle &= ~styleAllCaps; break; case rtfInvisible: if (turnOn) ics->charStyle |= styleInvisible; else ics->charStyle &= ~styleInvisible; break; case rtfFontNum: /* unimplemented */ break; case rtfFontSize: /* sizes are in half-points, convert to whole points */ ics->fontSize = (int) (rtfParam / 2); if (ics->fontSize <= 0) ++ics->fontSize; /* don't play with fire */ break; case rtfExpand: /* unimplemented */ break; case rtfUnderline: if (allowUnderline) { if (turnOn) ics->charStyle |= styleUnderline; else ics->charStyle &= ~styleUnderline; } break; case rtfWUnderline: if (allowUnderline) { if (turnOn) ics->charStyle |= styleWUnderline; else ics->charStyle &= ~styleWUnderline; } break; case rtfDUnderline: /* unimplemented */ break; case rtfDbUnderline: /* unimplemented */ break; case rtfNoUnderline: ics->charStyle &= ~styleUnderline; ics->charStyle &= ~styleWUnderline; break; case rtfSuperScript: /* superscripts are in half-points, convert to points */ ics->superScript = rtfParam / 2; break; case rtfSubScript: /* subscripts are in half-points, convert to points */ ics->subScript = rtfParam / 2; break; case rtfRevised: /* unimplemented */ break; case rtfForeColor: /* unimplemented */ break; case rtfBackColor: /* unimplemented */ break; } ++charStateChanged; } /* ---------------------------------------------------------------------- */ /* Tab handling routines */ void InitTabSet () { int i; ips->nTabs = 0; /* Set all tabs to be left-justified; that will then be used if no other tab type is specified. This is done because the position is specified *after* the type. */ for (i = 0; i < maxTab; i++) ips->tabType[i] = rtfTabLeft; } static void SetNextTabPos (pos) double pos; { if (ips->tabFlag != 0 && ips->nTabs >= maxTab) fprintf (stderr, "maximum tabstop count (%d) exceeded\n", maxTab); else { /* if no tab info has been set for this state, reinit them */ if (ips->tabFlag == 0) { InitTabSet (); ips->tabFlag = 1; } ips->tab[ips->nTabs++] = pos; } } /* Tab types are specified *before* the position to which they apply is given, so set the next available slot in anticipation of the position's being specified next. */ static void SetNextTabType (type) int type; { if (ips->tabFlag != 0 && ips->nTabs >= maxTab) fprintf (stderr, "maximum tabstop count (%d) exceeded\n", maxTab); else { /* if no tab info has been set for this state, reinit them */ if (ips->tabFlag == 0) { InitTabSet (); ips->tabFlag = 1; } ips->tabType[ips->nTabs] = type; } } static void SetTabChar (leader) int leader; { if (ips->tabFlag != 0 && ips->nTabs >= maxTab) fprintf (stderr, "maximum tabstop count (%d) exceeded\n", maxTab); else { /* if no tab info has been set for this state, reinit them */ if (ips->tabFlag == 0) { InitTabSet (); ips->tabFlag = 1; } ips->tabChar = rtfMinor; } }