A) Identify something that looks like a book-of-the-bible name (case insensitive), followed by whitespace, then a number.
B) Starting with the number after the bible-book name, try to make sense out of numbers, dashes, colons, etc,.
PART "A" - The patterns for "A" are regular expressions as shown in the code block below, basically they all look for a chacter which is not a number (0-9) or letter (a-zA-Z), followed by a fixed string, followed zero or more white space, followed by a number (0-9). A period might or might not be after the abbreviated book name. Of course, the pattern list can be ammended as necessary.
Code: Select all
struct pattern_s {
char *pattern ;
int book_id ;
#define MODE_BOOK 0x001
#define MODE_CHAPTER 0x002
#define MODE_VERSE 0x004
int mode_submatch ;
regex_t preg[1] ;
} books[] = {
{"[^0-9a-zA-Z]revelation *([0-9]+)" , BOOK_REVELATION , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]rev\.* *([0-9]+)" , BOOK_REVELATION , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jude *([0-9]+)" , BOOK_JUDE , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]iii *john *([0-9]+)" , BOOK_3JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]3 *john *([0-9]+)" , BOOK_3JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *john *([0-9]+)" , BOOK_2JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *john *([0-9]+)" , BOOK_2JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *john *([0-9]+)" , BOOK_1JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *john *([0-9]+)" , BOOK_1JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *peter *([0-9]+)" , BOOK_2PETER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *peter *([0-9]+)" , BOOK_2PETER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *peter *([0-9]+)" , BOOK_1PETER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *peter *([0-9]+)" , BOOK_1PETER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]james *([0-9]+)" , BOOK_JAMES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]hebrews *([0-9]+)" , BOOK_HEBREWS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]heb\.* *([0-9]+)" , BOOK_HEBREWS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]philemon *([0-9]+)" , BOOK_PHILEMON , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]phil\.* *([0-9]+)" , BOOK_PHILEMON , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]phil\.* *([0-9]+)" , BOOK_PHILEMON , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]titus *([0-9]+)" , BOOK_TITUS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *timothy *([0-9]+)" , BOOK_2TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *tim\.* *([0-9]+)" , BOOK_2TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *timothy *([0-9]+)" , BOOK_2TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *tim\.* *([0-9]+)" , BOOK_2TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *timothy *([0-9]+)" , BOOK_1TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *tim\.* *([0-9]+)" , BOOK_1TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *timothy *([0-9]+)" , BOOK_1TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *tim\.* *([0-9]+)" , BOOK_1TIMOTHY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *thessalonians *([0-9]+)" , BOOK_2THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *thess\.* *([0-9]+)" , BOOK_2THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *thes\.* *([0-9]+)" , BOOK_2THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *thessalonians *([0-9]+)" , BOOK_2THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *thes\.* *([0-9]+)" , BOOK_2THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *thessalonians *([0-9]+)" , BOOK_1THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *thes\.* *([0-9]+)" , BOOK_1THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *thessalonians *([0-9]+)" , BOOK_1THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *thes\.* *([0-9]+)" , BOOK_1THESSALONIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]colossians *([0-9]+)" , BOOK_COLOSSIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]col\.* *([0-9]+)" , BOOK_COLOSSIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]philippians *([0-9]+)" , BOOK_PHILIPPIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]phil\.* *([0-9]+)" , BOOK_PHILIPPIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ephesians *([0-9]+)" , BOOK_EPHESIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]eph\.* *([0-9]+)" , BOOK_EPHESIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]galatians *([0-9]+)" , BOOK_GALATIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]gal\.* *([0-9]+)" , BOOK_GALATIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *corinthians *([0-9]+)" , BOOK_2CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *cor\.* *([0-9]+)" , BOOK_2CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *corinthians *([0-9]+)" , BOOK_2CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *cor\.* *([0-9]+)" , BOOK_2CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *corinthians *([0-9]+)" , BOOK_1CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *cor\.* *([0-9]+)" , BOOK_1CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *corinthians *([0-9]+)" , BOOK_1CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *cor\.* *([0-9]+)" , BOOK_1CORINTHIANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]romans *([0-9]+)" , BOOK_ROMANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]rom\.* *([0-9]+)" , BOOK_ROMANS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]acts *([0-9]+)" , BOOK_ACTS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]john *([0-9]+)" , BOOK_JOHN , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]luke *([0-9]+)" , BOOK_LUKE , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]mark *([0-9]+)" , BOOK_MARK , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]matthew *([0-9]+)" , BOOK_MATTHEW , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]matt\.* *([0-9]+)" , BOOK_MATTHEW , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]malachi *([0-9]+)" , BOOK_MALACHI , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]mal\.* *([0-9]+)" , BOOK_MALACHI , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]zechariah *([0-9]+)" , BOOK_ZECHARIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]zech\.* *([0-9]+)" , BOOK_ZECHARIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]haggai *([0-9]+)" , BOOK_HAGGAI , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]hag\.* *([0-9]+)" , BOOK_HAGGAI , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]zephaniah *([0-9]+)" , BOOK_ZEPHANIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]zeph\.* *([0-9]+)" , BOOK_ZEPHANIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]habakkuk *([0-9]+)" , BOOK_HABAKKUK , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]hab\.* *([0-9]+)" , BOOK_HABAKKUK , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]nahum *([0-9]+)" , BOOK_NAHUM , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]micah *([0-9]+)" , BOOK_MICAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jonah *([0-9]+)" , BOOK_JONAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jon\.* *([0-9]+)" , BOOK_JONAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]obadiah *([0-9]+)" , BOOK_OBADIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]amos *([0-9]+)" , BOOK_AMOS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]joel *([0-9]+)" , BOOK_JOEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]hosea *([0-9]+)" , BOOK_HOSEA , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]daniel *([0-9]+)" , BOOK_DANIEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ezekiel *([0-9]+)" , BOOK_EZEKIEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]eze\.* *([0-9]+)" , BOOK_EZEKIEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]lamentations *([0-9]+)" , BOOK_LAMENTATIONS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]lam\.* *([0-9]+)" , BOOK_LAMENTATIONS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jeremiah *([0-9]+)" , BOOK_JEREMIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jer\.* *([0-9]+)" , BOOK_JEREMIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]isaiah *([0-9]+)" , BOOK_ISAIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]isa\.* *([0-9]+)" , BOOK_ISAIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]songofsolomon *([0-9]+)" , BOOK_SONGOFSOLOMON , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]sos\.* *([0-9]+)" , BOOK_SONGOFSOLOMON , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ecclesiastes *([0-9]+)" , BOOK_ECCLESIASTES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ecc\.* *([0-9]+)" , BOOK_ECCLESIASTES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]proverbs *([0-9]+)" , BOOK_PROVERBS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]prov\.* *([0-9]+)" , BOOK_PROVERBS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]psalm *([0-9]+)" , BOOK_PSALM , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ps\.* *([0-9]+)" , BOOK_PSALM , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]job *([0-9]+)" , BOOK_JOB , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]esther *([0-9]+)" , BOOK_ESTHER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]est\.* *([0-9]+)" , BOOK_ESTHER , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]nehemiah *([0-9]+)" , BOOK_NEHEMIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]neh\.* *([0-9]+)" , BOOK_NEHEMIAH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ezra *([0-9]+)" , BOOK_EZRA , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *chronicles *([0-9]+)" , BOOK_2CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *chr\.* *([0-9]+)" , BOOK_2CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *chronicles *([0-9]+)" , BOOK_2CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *chr\.* *([0-9]+)" , BOOK_2CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *chronicles *([0-9]+)" , BOOK_1CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *chr\.* *([0-9]+)" , BOOK_1CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *chronicles *([0-9]+)" , BOOK_1CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *chr\.* *([0-9]+)" , BOOK_1CHRONICLES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *kings *([0-9]+)" , BOOK_2KINGS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *kings *([0-9]+)" , BOOK_2KINGS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *kings *([0-9]+)" , BOOK_1KINGS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *kings *([0-9]+)" , BOOK_1KINGS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *samuel *([0-9]+)" , BOOK_2SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ii *sam\.* *([0-9]+)" , BOOK_2SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *samuel *([0-9]+)" , BOOK_2SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]2 *sam\.* *([0-9]+)" , BOOK_2SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *samuel *([0-9]+)" , BOOK_1SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]i *sam\.* *([0-9]+)" , BOOK_1SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *samuel *([0-9]+)" , BOOK_1SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]1 *sam\.* *([0-9]+)" , BOOK_1SAMUEL , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]ruth *([0-9]+)" , BOOK_RUTH , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]judges *([0-9]+)" , BOOK_JUDGES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]jud\.* *([0-9]+)" , BOOK_JUDGES , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]joshua *([0-9]+)" , BOOK_JOSHUA , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]josh\.* *([0-9]+)" , BOOK_JOSHUA , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]deuteronomy *([0-9]+)" , BOOK_DEUTERONOMY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]deut\.* *([0-9]+)" , BOOK_DEUTERONOMY , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]numbers *([0-9]+)" , BOOK_NUMBERS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]num\.* *([0-9]+)" , BOOK_NUMBERS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]leviticus *([0-9]+)" , BOOK_LEVITICUS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]lev\.* *([0-9]+)" , BOOK_LEVITICUS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]exodus *([0-9]+)" , BOOK_EXODUS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]exe\.* *([0-9]+)" , BOOK_EXODUS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]genesis *([0-9]+)" , BOOK_GENESIS , MODE_BOOK|MODE_CHAPTER ,},
{"[^0-9a-zA-Z]gen\.* *([0-9]+)" , BOOK_GENESIS , MODE_BOOK|MODE_CHAPTER ,},
};
PART "B" tries to parse the stuff following the book name into the form of four integers. If a span is not being referenced, the ending chapter and ending verse are set to zero.
I could post the actual source code, but I don't think it would do you much good. Rather, demonstrating by example, here's the results followed by the original string in single quotes. Notice a single 'string' might produce multiple 'hits'. The Ruth reference is a good example of this, as well as, how what follows the semicolon is assumed to be a chapter reference, and what follows a colon is assumed to be verse reference.