// // CX. Produce a concordance table of names in C-ish source files. // // Copyright (C) 2001 James Moen. // // This program is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation; either version 2 of the License, or (at your option) // any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for // more details. // // You should have received a copy of the GNU General Public License along // with this program; if not, write to the Free Software Foundation, Inc., 59 // Temple Place, Suite 330, Boston, MA 02111-1307 USA. // // // The command // // cx file1 file2 ... fileN // // lists the named source files to standard output. Following the list will be // a concordance table of the names in these files. CX is known to work for C, // C++, and Java source files. It might work for source files written in other // C-like languages too. // // If you have questions or comments about CX, write to moen@augsburg.edu. // #include #include #include #define apostrapheChar '\'' // These delimit char constants. #define blankChar ' ' // A blank. Duh. #define eofChar EOF // End of file char. #define eolChar '\n' // End of line char. #define eopChar '\f' // End of page char. #define eosChar '\0' // End of string char. #define escapeChar '\\' // These prefix chars in strings. #define false 0x00 // A BOOL constant. #define F 0x00 // A BOOL constant. #define lineNumberDigits 5 // Digits in a line number. #define maxNameLength 32 // Chars in a name. #define maxNumbersOnLine 9 // This gives about 80-char lines. #define nil NULL // The null pointer. #define quoteChar '"' // These delimit string constants. #define slashChar '/' // This might start a comment. #define starChar '*' // This might follow SLASH CHAR. #define true 0xFF // Another BOOL constant. #define T 0xFF // Another BOOL constant. typedef char bool; // A fake Boolean type. typedef FILE *refFile; // Ref to file. typedef struct lineNodeStruct lineNode; // A LINE NODE. typedef struct nameNodeStruct nameNode; // A NAME NODE. typedef char *refChar; // Ref to char. typedef char **refRefChar; // Ref to ref to char. typedef struct lineNodeStruct *refLineNode; // Ref to LINE NODE. typedef struct nameNodeStruct *refNameNode; // Ref to NAME NODE. typedef char nameString[maxNameLength]; // A name. // LINE NODE STRUCT. Used to make linked queues of line numbers. struct lineNodeStruct { long number; refLineNode nextLine; }; // NAME NODE STRUCT. Record information about the name NAME. The appearances // of NAME are kept in a linked queue whose ends are FIRST LINE and LAST LINE. // NAME NODE STRUCTs themselves are organized into an unbalanced binary search // tree via LEFT SUBTREE and RIGHT SUBTREE. struct nameNodeStruct { nameString name; refLineNode firstLine; refLineNode lastLine; refNameNode leftSubtree; refNameNode rightSubtree; }; char ch; // Most recent char from SOURCE. nameString emptyName; // A name full of EOS CHARs. long lineNumber; // Most recent line number from SOURCE. nameString name; // Most recent name from SOURCE. refNameNode nameTree; // Root of the binary search tree. refFile source; // Most recent source file. // NEW NAME SUBTREE. Make a new search tree node that contains NAME. refNameNode newNameSubtree(nameString name) { refNameNode newSubtree = malloc(sizeof(nameNode)); memcpy(newSubtree->name, name, maxNameLength); newSubtree->firstLine = nil; newSubtree->lastLine = nil; newSubtree->leftSubtree = nil; newSubtree->rightSubtree = nil; return newSubtree; } // NEW LINE NODE. Make a new line number node that conatains NUMBER. refLineNode newLineNode(long number) { refLineNode newNode = malloc(sizeof(lineNode)); newNode->number = number; newNode->nextLine = nil; return newNode; } // RECORD NAME. Add the name NAME and the current line number LINE NUMBER to // the search tree. We traverse the search tree, looking for a SUBTREE whose // root contains NAME. If there is no such subtree, we make a new one. Then we // add LINE NUMBER to SUBTREE's queue. void recordName() { refNameNode subtree = nameTree; while (true) { int order = memcmp(subtree->name, name, maxNameLength); if (order > 0) { if (subtree->leftSubtree == nil) { subtree->leftSubtree = newNameSubtree(name); subtree = subtree->leftSubtree; break; } else { subtree = subtree->leftSubtree; }} else if (order < 0) { if (subtree->rightSubtree == nil) { subtree->rightSubtree = newNameSubtree(name); subtree = subtree->rightSubtree; break; } else { subtree = subtree->rightSubtree; }} else { break; }} if (subtree->firstLine == nil) { refLineNode newNode = newLineNode(lineNumber); subtree->firstLine = newNode; subtree->lastLine = newNode; } else if (subtree->lastLine->number != lineNumber) { refLineNode newNode = newLineNode(lineNumber); subtree->lastLine->nextLine = newNode; subtree->lastLine = newNode; }} // WRITE APPEARANCES. Write the NAME at SUBTREE, the root of a subtree in the // binary search tree. Then dequeue the line numbers at SUBTREE one at a time // and write them as we go. void writeAppearances(refNameNode subtree) { refChar index = subtree->name; refLineNode line = subtree->firstLine; int numbersOnLine = 0; while (index < subtree->name + maxNameLength) { if (index[0] == eosChar) { putc(blankChar, stdout); } else { putc(index[0], stdout); } index += 1; } while (line != nil) { if (numbersOnLine == maxNumbersOnLine) { numbersOnLine = 0; fprintf(stdout, "\n%*s", maxNameLength, " "); } fprintf(stdout, " %0*i", lineNumberDigits, line->number); numbersOnLine += 1; line = line->nextLine; } if (numbersOnLine > 0) { putc(eolChar, stdout); }} // TRAVERSE IN ORDER. Traverse SUBTREE in inorder, writing the names and the // line numbers at each root as we go. void traverseInOrder(refNameNode subtree) { while (subtree != nil) { traverseInOrder(subtree->leftSubtree); writeAppearances(subtree); subtree = subtree->rightSubtree; }} // IS NAME CHAR. Test if the character CH can appear in a name. We simply look // it up in the table NAME CHARS. bool nameChars[256] = { F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F, F, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, T, F, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, F, F, F, F }; bool isNameChar(char ch) { return nameChars[ch + 128]; } // NEXT CHAR. Write the most recent character from SOURCE into standard output // and then read the next character. void nextChar() { putc(ch, stdout); ch = getc(source); } // NEXT COMMENT. Read the next comment from SOURCE. If it's a comment starting // with "//" we read chars until we get an EOL CHAR. If it's one starting with // "/*", we read chars until we get "*/". void nextComment() { nextChar(); if (ch == slashChar) { while (ch != eofChar && ch != eolChar) { nextChar(); }} else if (ch == starChar) { nextChar(); while (ch != eofChar) { if (ch == eolChar) { nextChar(); lineNumber += 1; fprintf(stdout, "%0*i ", lineNumberDigits, lineNumber); } else if (ch == starChar) { nextChar(); if (ch == slashChar) { nextChar(); return; }} else { nextChar(); }}}} // NEXT NUMBER. Read the next series of digits and letters from SOURCE. void nextNumber() { while (isNameChar(ch)) { nextChar(); }} // NEXT NAME. Read the next name from SOURCE and copy it into NAME. void nextName() { refChar index = name; memcpy(name, emptyName, maxNameLength); while (isNameChar(ch)) { if (index < name + maxNameLength) { index[0] = ch; index += 1; } nextChar(); }} // NEXT STRING. Read the next character or string from SOURCE. Whatever it is, // it begins and ends with DELIMITER. void nextString() { char delimiter = ch; nextChar(); while (ch != eofChar && ch != eolChar && ch != delimiter) { if (ch == escapeChar) { nextChar(); } nextChar(); } if (ch == delimiter) { nextChar(); }} // MAIN. First check if every file mentioned on the command line exists, and // abort with an error message if one does not. If every file exists, then we // read names from each file in sequence, adding them and their line numbers // to the search tree. Copy the files to standard output as we go. Finally we // traverse the search tree to write names (along with their line numbers) in // lexicographic order. int main(int argc, refRefChar argv) { refRefChar index; index = argv + 1; while (index < argv + argc) { source = fopen(index[0], "r"); if (source == nil) { fprintf(stderr, "%s: cannot open %s\n", argv[0], index[0]); return 1; } else { fclose(source); } index += 1; } lineNumber = 0; memset(emptyName, eosChar, maxNameLength); nameTree = newNameSubtree(emptyName); index = argv + 1; while (index < argv + argc) { source = fopen(index[0], "r"); ch = getc(source); while (ch != eofChar) { lineNumber += 1; fprintf(stdout, "%0*i ", lineNumberDigits, lineNumber); while (ch != eofChar && ch != eolChar) { if ('0' <= ch && ch <= '9') { nextNumber(); } else if (isNameChar(ch)) { nextName(); recordName(); } else if (ch == apostrapheChar || ch == quoteChar) { nextString(); } else if (ch == slashChar) { nextComment(); } else { nextChar(); }} if (ch == eolChar) { nextChar(); }} putc(eopChar, stdout); fclose(source); index += 1; } traverseInOrder(nameTree->leftSubtree); traverseInOrder(nameTree->rightSubtree); return 0; }