Parsing command-line arguments in C
I'm trying to write a program that can compare two files line by line, word by word, or character by character in C. It has to be able to read in command line options -l
, -w
, -i
or --
...
- if the option is
-l
, it compares the files line by line. - if the option is
-w
, it compares the files word by word. - if the option is
--
, it automatically assumes that the next argument is the first filename. - if the option is
-i
, it compares them in a case insensitive manner. - defaults to comparing the files character by character.
It's not supposed to matter how many times the options are input as long as -w
and -l
aren't inputted at the same time and there are no more or less than two files.
I don't even know where to begin with parsing the command line arguments.
So this is the code that I came up with for everything. I haven't error checked it quite yet, but am I writing things in an overcomplicated manner?
/*
* Functions to compare files.
*/
int compare_line();
int compare_word();
int compare_char();
int case_insens();
/*
* Program to compare the information in two files and print message saying
* whether or not this was successful.
*/
int main(int argc, char* argv[])
{
/* Loop counter */
size_t i = 0;
/* Variables for functions */
int caseIns = 0;
int line = 0;
int word = 0;
/* File pointers */
FILE *fp1, *fp2;
/*
* Read through command-line arguments for options.
*/
for (i = 1; i < argc; i++)
{
printf("argv[%u] = %s\n", i, argv[i]);
if (argv[i][0] == '-')
{
if (argv[i][1] == 'i')
{
caseIns = 1;
}
if (argv[i][1] == 'l')
{
line = 1;
}
if (argv[i][1] == 'w')
{
word = 1;
}
if (argv[i][1] == '-')
{
fp1 = argv[i][2];
fp2 = argv[i][3];
}
else
{
printf("Invalid option.");
return 2;
}
}
else
{
fp1(argv[i]);
fp2(argv[i][1]);
}
}
/*
* Check that files can be opened.
*/
if(((fp1 = fopen(fp1, "rb")) == NULL) || ((fp2 = fopen(fp2, "rb")) == NULL))
{
perror("fopen()");
return 3;
}
else
{
if (caseIns == 1)
{
if(line == 1 && word == 1)
{
printf("That is invalid.");
return 2;
}
if(line == 1 && word == 0)
{
if(compare_line(case_insens(fp1, fp2)) == 0)
return 0;
}
if(line == 0 && word == 1)
{
if(compare_word(case_insens(fp1, fp2)) == 0)
return 0;
}
else
{
if(compare_char(case_insens(fp1,fp2)) == 0)
return 0;
}
}
else
{
if(line == 1 && word == 1)
{
printf("That is invalid.");
return 2;
}
if(line == 1 && word == 0)
{
if(compare_line(fp1, fp2) == 0)
return 0;
}
if(line == 0 && word == 1)
{
if(compare_word(fp1, fp2) == 0)
return 0;
}
else
{
if(compare_char(fp1, fp2) == 0)
return 0;
}
}
}
return 1;
if(((fp1 = fclose(fp1)) == NULL) || (((fp2 = fclose(fp2)) == NULL)))
{
perror("fclose()");
return 3;
}
else
{
fp1 = fclose(fp1);
fp2 = fclose(fp2);
}
}
/*
* Function to compare two files line-by-line.
*/
int compare_line(FILE *fp1, FILE *fp2)
{
/* Buffer variables to store the lines in the file */
char buff1 [LINESIZE];
char buff2 [LINESIZE];
/* Check that neither is the end of file */
while((!feof(fp1)) && (!feof(fp2)))
{
/* Go through files line by line */
fgets(buff1, LINESIZE, fp1);
fgets(buff2, LINESIZE, fp2);
}
/* Compare files line by line */
if(strcmp(buff1, buff2) == 0)
{
printf("Files are equal.\n");
return 0;
}
printf("Files are not equal.\n");
return 1;
}
/*
* Function to compare two files word-by-word.
*/
int compare_word(FILE *fp1, FILE *fp2)
{
/* File pointers */
FILE *fp1, *fp2;
/* Arrays to store words */
char fp1words[LINESIZE];
char fp2words[LINESIZE];
if(strtok(fp1, " ") == NULL || strtok(fp2, " ") == NULL)
{
printf("File is empty. Cannot compare.\n");
return 0;
}
else
{
fp1words = strtok(fp1, " ");
fp2words = strtok(fp2, " ");
if(fp1words == fp2words)
{
fputs(fp1words);
fputs(fp2words);
printf("Files are equal.\n");
return 0;
}
}
return 1;
}
/*
* Function to compare two files character by character.
*/
int compare_char(FILE *fp1,FILE *fp2)
{
/* Variables to store the characters from both files */
int c;
int d;
/* Buffer variables to store chars */
char buff1 [LINESIZE];
char buff2 [LINESIZE];
while(((c = fgetc(fp1))!= EOF) && (((d = fgetc(fp2))!=EOF)))
{
if(c == d)
{
if((fscanf(fp1, "%c", buff1)) == (fscanf(fp2, "%c", buff2)))
{
printf("Files have equivalent characters.\n");
return 1;
break;
}
}
}
return 0;
}
/*
* Function to compare two files in a case-insensitive manner.
*/
int case_insens(FILE *fp1, FILE *fp2, size_t n)
{
/* Pointers for files. */
FILE *fp1, *fp2;
/* Variable to go through files. */
size_t i = 0;
/* Arrays to store file information. */
char fp1store[LINESIZE];
char fp2store[LINESIZE];
while(!feof(fp1) && !feof(fp2))
{
for(i = 0; i < n; i++)
{
fscanf(fp1, "%s", fp1store);
fscanf(fp2, "%s", fp2store);
fp1store = tolower(fp1store);
fp2store = tolower(fp2store);
return 1;
}
}
return 0;
}
Solution 1:
To my knowledge, the three most popular ways how to parse command line arguments in C are:
-
Getopt (
#include <unistd.h>
from the POSIX C Library), which can solve simple argument parsing tasks. If you're a bit familiar with bash, the getopt built-in of bash is based on Getopt from the GNU libc. -
Argp (
#include <argp.h>
from the GNU C Library), which can solve more complex tasks and takes care of stuff like, for example:-
-?
,--help
for help message, including email address -
-V
,--version
for version information -
--usage
for usage message
-
- Doing it yourself, which I don't recommend for programs that would be given to somebody else, as there is too much that could go wrong or lower quality. The popular mistake of forgetting about '--' to stop option parsing is just one example.
The GNU C Library documentation has some nice examples for Getopt and Argp.
- http://www.gnu.org/software/libc/manual/html_node/Getopt.html
- http://www.gnu.org/software/libc/manual/html_node/Argp.html
Example for using Getopt
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
int main(int argc, char *argv[])
{
bool isCaseInsensitive = false;
int opt;
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode = CHARACTER_MODE;
while ((opt = getopt(argc, argv, "ilw")) != -1) {
switch (opt) {
case 'i': isCaseInsensitive = true; break;
case 'l': mode = LINE_MODE; break;
case 'w': mode = WORD_MODE; break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
// Now optind (declared extern int by <unistd.h>) is the index of the first non-option argument.
// If it is >= argc, there were no non-option arguments.
// ...
}
Example for using Argp
#include <argp.h>
#include <stdbool.h>
const char *argp_program_version = "programname programversion";
const char *argp_program_bug_address = "<[email protected]>";
static char doc[] = "Your program description.";
static char args_doc[] = "[FILENAME]...";
static struct argp_option options[] = {
{ "line", 'l', 0, 0, "Compare lines instead of characters."},
{ "word", 'w', 0, 0, "Compare words instead of characters."},
{ "nocase", 'i', 0, 0, "Compare case insensitive instead of case sensitive."},
{ 0 }
};
struct arguments {
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode;
bool isCaseInsensitive;
};
static error_t parse_opt(int key, char *arg, struct argp_state *state) {
struct arguments *arguments = state->input;
switch (key) {
case 'l': arguments->mode = LINE_MODE; break;
case 'w': arguments->mode = WORD_MODE; break;
case 'i': arguments->isCaseInsensitive = true; break;
case ARGP_KEY_ARG: return 0;
default: return ARGP_ERR_UNKNOWN;
}
return 0;
}
static struct argp argp = { options, parse_opt, args_doc, doc, 0, 0, 0 };
int main(int argc, char *argv[])
{
struct arguments arguments;
arguments.mode = CHARACTER_MODE;
arguments.isCaseInsensitive = false;
argp_parse(&argp, argc, argv, 0, 0, &arguments);
// ...
}
Example for Doing it Yourself
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
int main(int argc, char *argv[])
{
bool isCaseInsensitive = false;
enum { CHARACTER_MODE, WORD_MODE, LINE_MODE } mode = CHARACTER_MODE;
size_t optind;
for (optind = 1; optind < argc && argv[optind][0] == '-'; optind++) {
switch (argv[optind][1]) {
case 'i': isCaseInsensitive = true; break;
case 'l': mode = LINE_MODE; break;
case 'w': mode = WORD_MODE; break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
argv += optind;
// *argv points to the remaining non-option arguments.
// If *argv is NULL, there were no non-option arguments.
// ...
}
Disclaimer: I am new to Argp, the example might contain errors.
Solution 2:
Use getopt()
, or perhaps getopt_long()
.
int iflag = 0;
enum { WORD_MODE, LINE_MODE } op_mode = WORD_MODE; // Default set
int opt;
while ((opt = getopt(argc, argv, "ilw") != -1)
{
switch (opt)
{
case 'i':
iflag = 1;
break;
case 'l':
op_mode = LINE_MODE;
break;
case 'w':
op_mode = WORD_MODE;
break;
default:
fprintf(stderr, "Usage: %s [-ilw] [file ...]\n", argv[0]);
exit(EXIT_FAILURE);
}
}
/* Process file names or stdin */
if (optind >= argc)
process(stdin, "(standard input)", op_mode);
else
{
int i;
for (i = optind; i < argc; i++)
{
FILE *fp = fopen(argv[i], "r");
if (fp == 0)
fprintf(stderr, "%s: failed to open %s (%d %s)\n",
argv[0], argv[i], errno, strerror(errno));
else
{
process(fp, argv[i], op_mode);
fclose(fp);
}
}
}
Note that you need to determine which headers to include (I make it 4 that are required), and the way I wrote the op_mode
type means you have a problem in the function process()
- you can't access the enumeration down there. It's best to move the enumeration outside the function; you might even make op_mode
a file-scope variable without external linkage (a fancy way of saying static
) to avoid passing it to the function. This code does not handle -
as a synonym for standard input, another exercise for the reader. Note that getopt()
automatically takes care of --
to mark the end of options for you.
I've not run any version of the typing above past a compiler; there could be mistakes in it.
For extra credit, write a (library) function:
int filter(int argc, char **argv, int idx, int (*function)(FILE *fp, const char *fn));
which encapsulates the logic for processing file name options after the getopt()
loop. It should handle -
as standard input. Note that using this would indicate that op_mode
should be a static file scope variable. The filter()
function takes argc
, argv
, optind
and a pointer to the processing function. It should return 0 (EXIT_SUCCESS) if it was able to open all the files and all invocations of the function reported 0, otherwise 1 (or EXIT_FAILURE). Having such a function simplifies writing Unix-style 'filter' programs that read files specified on the command line or standard input.
Solution 3:
I've found Gengetopt to be quite useful - you specify the options you want with a simple configuration file, and it generates a .c/.h pair that you simply include and link with your application. The generated code makes use of getopt_long, appears to handle most common sorts of command line parameters, and it can save a lot of time.
A gengetopt input file might look something like this:
version "0.1"
package "myApp"
purpose "Does something useful."
# Options
option "filename" f "Input filename" string required
option "verbose" v "Increase program verbosity" flag off
option "id" i "Data ID" int required
option "value" r "Data value" multiple(1-) int optional
Generating the code is easy and spits out cmdline.h
and cmdline.c
:
$ gengetopt --input=myApp.cmdline --include-getopt
The generated code is easily integrated:
#include <stdio.h>
#include "cmdline.h"
int main(int argc, char ** argv) {
struct gengetopt_args_info ai;
if (cmdline_parser(argc, argv, &ai) != 0) {
exit(1);
}
printf("ai.filename_arg: %s\n", ai.filename_arg);
printf("ai.verbose_flag: %d\n", ai.verbose_flag);
printf("ai.id_arg: %d\n", ai.id_arg);
int i;
for (i = 0; i < ai.value_given; ++i) {
printf("ai.value_arg[%d]: %d\n", i, ai.value_arg[i]);
}
}
If you need to do any extra checking (such as ensuring flags are mutually exclusive), you can do this fairly easily with the data stored in the gengetopt_args_info
struct.
Solution 4:
You can use James Theiler's "opt" package.
And a flattering post with some examples of how it is so much simpler than other approaches is here:
Opt 3.19 review and upgrades
Solution 5:
Docopt has a C implementation that I thought was quite nice:
From a man-page standardized format describing command line options, docopt infers and creates an argument parser. This got started in Python; the Python version literally just parses the docstring and returns a dict. To do this in C takes a little more work, but it's clean to use and has no external dependencies.