496 lines
16 KiB
C++
496 lines
16 KiB
C++
// Copyright 2008 John Maddock
|
|
//
|
|
// Use, modification and distribution are subject to the
|
|
// Boost Software License, Version 1.0.
|
|
// (See accompanying file LICENSE_1_0.txt
|
|
// or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
#include "auto_index.hpp"
|
|
|
|
bool need_defaults = true;
|
|
|
|
void install_default_scanners()
|
|
{
|
|
need_defaults = false;
|
|
//
|
|
// Set the default scanners if they're not defined already:
|
|
//
|
|
file_scanner s;
|
|
s.type = "class_name";
|
|
if(file_scanner_set.find(s) == file_scanner_set.end())
|
|
{
|
|
add_file_scanner(
|
|
"class_name", // Index type
|
|
// Header file scanner regex:
|
|
// possibly leading whitespace:
|
|
"^[[:space:]]*"
|
|
// possible template declaration:
|
|
"(template[[:space:]]*<[^;:{]+>[[:space:]]*)?"
|
|
// class or struct:
|
|
"(class|struct)[[:space:]]*"
|
|
// leading declspec macros etc:
|
|
"("
|
|
"\\<\\w+\\>"
|
|
"("
|
|
"[[:blank:]]*\\([^)]*\\)"
|
|
")?"
|
|
"[[:space:]]*"
|
|
")*"
|
|
// the class name
|
|
"(\\<\\w*\\>)[[:space:]]*"
|
|
// template specialisation parameters
|
|
"(<[^;:{]+>)?[[:space:]]*"
|
|
// terminate in { or :
|
|
"(\\{|:[^;\\{()]*\\{)",
|
|
|
|
"(?:class|struct)[^;{]+\\\\<\\5\\\\>[^;{]+\\\\{", // Format string to create indexing regex.
|
|
"\\5", // Format string to create index term.
|
|
"", // Filter regex for section id's.
|
|
"" // Filter regex for filenames.
|
|
);
|
|
}
|
|
|
|
s.type = "typedef_name";
|
|
if(file_scanner_set.find(s) == file_scanner_set.end())
|
|
{
|
|
add_file_scanner(
|
|
"typedef_name", // Index type
|
|
"typedef[^;{}#]+?(\\w+)\\s*;", // scanner regex
|
|
"typedef[^;]+\\\\<\\1\\\\>\\\\s*;", // Format string to create indexing regex.
|
|
"\\1", // Format string to create index term.
|
|
"", // Filter regex for section id's.
|
|
"" // Filter regex for filenames.
|
|
);
|
|
}
|
|
|
|
s.type = "macro_name";
|
|
if(file_scanner_set.find(s) == file_scanner_set.end())
|
|
{
|
|
add_file_scanner(
|
|
"macro_name", // Index type
|
|
"^\\s*#\\s*define\\s+(\\w+)", // scanner regex
|
|
"\\\\<\\1\\\\>", // Format string to create indexing regex.
|
|
"\\1", // Format string to create index term.
|
|
"", // Filter regex for section id's.
|
|
"" // Filter regex for filenames.
|
|
);
|
|
}
|
|
|
|
s.type = "function_name";
|
|
if(file_scanner_set.find(s) == file_scanner_set.end())
|
|
{
|
|
add_file_scanner(
|
|
"function_name", // Index type
|
|
"\\w++(?:\\s*+<[^>]++>)?[\\s&*]+?(\\w+)\\s*(?:BOOST_[[:upper:]_]+\\s*)?\\([^;{}]*\\)\\s*[;{]", // scanner regex
|
|
"\\\\<\\\\w+\\\\>(?:\\\\s+<[^>]*>)*[\\\\s&*]+\\\\<\\1\\\\>\\\\s*\\\\([^;{]*\\\\)", // Format string to create indexing regex.
|
|
"\\1", // Format string to create index term.
|
|
"", // Filter regex for section id's.
|
|
"" // Filter regex for filenames.
|
|
);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Helper to dump file contents into a std::string:
|
|
//
|
|
void load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
if(is.bad()) return;
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
char c;
|
|
while(is.get(c))
|
|
{
|
|
if(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
//
|
|
// Helper to convert string from external source into valid XML:
|
|
//
|
|
std::string escape_to_xml(const std::string& in)
|
|
{
|
|
std::string result;
|
|
for(std::string::size_type i = 0; i < in.size(); ++i)
|
|
{
|
|
switch(in[i])
|
|
{
|
|
case '&':
|
|
result.append("&");
|
|
break;
|
|
case '<':
|
|
result.append("<");
|
|
break;
|
|
case '>':
|
|
result.append(">");
|
|
break;
|
|
case '"':
|
|
result.append(""");
|
|
break;
|
|
default:
|
|
result.append(1, in[i]);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
//
|
|
// Scan a source file for things to index:
|
|
//
|
|
void scan_file(const std::string& file)
|
|
{
|
|
if(need_defaults)
|
|
install_default_scanners();
|
|
if(verbose)
|
|
std::cout << "Scanning file... " << file << std::endl;
|
|
std::string text;
|
|
std::ifstream is(file.c_str());
|
|
if(!is.peek() || !is.good())
|
|
throw std::runtime_error(std::string("Unable to read from file: ") + file);
|
|
load_file(text, is);
|
|
|
|
for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
|
|
{
|
|
bool need_debug = false;
|
|
if(!debug.empty() && regex_match(pscan->type, ::debug))
|
|
{
|
|
need_debug = true;
|
|
std::cout << "Processing scanner " << pscan->type << " on file " << file << std::endl;
|
|
std::cout << "Scanner regex:" << pscan->scanner << std::endl;
|
|
std::cout << "Scanner formatter (search regex):" << pscan->format_string << std::endl;
|
|
std::cout << "Scanner formatter (index term):" << pscan->term_formatter << std::endl;
|
|
std::cout << "Scanner file name filter:" << pscan->file_name_filter << std::endl;
|
|
std::cout << "Scanner section id filter:" << pscan->section_filter << std::endl;
|
|
}
|
|
if(!pscan->file_name_filter.empty())
|
|
{
|
|
if(!regex_match(file, pscan->file_name_filter))
|
|
{
|
|
if(need_debug)
|
|
{
|
|
std::cout << "File failed to match file name filter, this file will be skipped..." << std::endl;
|
|
}
|
|
continue; // skip this file
|
|
}
|
|
}
|
|
if(verbose && !need_debug)
|
|
std::cout << "Scanning for type \"" << (*pscan).type << "\" ... " << std::endl;
|
|
boost::sregex_iterator i(text.begin(), text.end(), (*pscan).scanner), j;
|
|
while(i != j)
|
|
{
|
|
try
|
|
{
|
|
index_info info;
|
|
info.term = escape_to_xml(i->format(pscan->term_formatter));
|
|
info.search_text = i->format(pscan->format_string);
|
|
info.category = pscan->type;
|
|
if(!pscan->section_filter.empty())
|
|
info.search_id = pscan->section_filter;
|
|
std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
|
|
if(pos.second)
|
|
{
|
|
if(verbose || need_debug)
|
|
std::cout << "Indexing " << info.term << " as type " << info.category << std::endl;
|
|
if(need_debug)
|
|
std::cout << "Search regex will be: \"" << info.search_text << "\"" <<
|
|
" ID constraint is: \"" << info.search_id << "\""
|
|
<< "Found text was: " << i->str() << std::endl;
|
|
if(pos.first->search_text != info.search_text)
|
|
{
|
|
//
|
|
// Merge the search terms:
|
|
//
|
|
const_cast<boost::regex&>(pos.first->search_text) =
|
|
"(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
|
|
}
|
|
if(pos.first->search_id != info.search_id)
|
|
{
|
|
//
|
|
// Merge the ID constraints:
|
|
//
|
|
const_cast<boost::regex&>(pos.first->search_id) =
|
|
"(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
|
|
}
|
|
}
|
|
}
|
|
catch(const boost::regex_error& e)
|
|
{
|
|
std::cerr << "Unable to create regular expression from found index term:\""
|
|
<< i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
|
|
std::cerr << e.what() << std::endl;
|
|
}
|
|
catch(const std::exception& e)
|
|
{
|
|
std::cerr << "Unable to create index term:\""
|
|
<< i->format(pscan->term_formatter) << "\" In file " << file << std::endl;
|
|
std::cerr << e.what() << std::endl;
|
|
throw;
|
|
}
|
|
++i;
|
|
}
|
|
}
|
|
}
|
|
//
|
|
// Scan a whole directory for files to search:
|
|
//
|
|
void scan_dir(const std::string& dir, const std::string& mask, bool recurse)
|
|
{
|
|
using namespace boost::filesystem;
|
|
boost::regex e(mask);
|
|
directory_iterator i(dir), j;
|
|
|
|
while(i != j)
|
|
{
|
|
if(regex_match(i->path().filename().string(), e))
|
|
{
|
|
scan_file(i->path().string());
|
|
}
|
|
else if(recurse && is_directory(i->status()))
|
|
{
|
|
scan_dir(i->path().string(), mask, recurse);
|
|
}
|
|
++i;
|
|
}
|
|
}
|
|
//
|
|
// Remove quotes from a string:
|
|
//
|
|
std::string unquote(const std::string& s)
|
|
{
|
|
std::string result(s);
|
|
if((s.size() >= 2) && (*s.begin() == '\"') && (*s.rbegin() == '\"'))
|
|
{
|
|
result.erase(result.begin());
|
|
result.erase(result.end() - 1);
|
|
}
|
|
return result;
|
|
}
|
|
//
|
|
// Load and process a script file:
|
|
//
|
|
void process_script(const std::string& script)
|
|
{
|
|
static const boost::regex comment_parser(
|
|
"\\s*(?:#.*)?$"
|
|
);
|
|
static const boost::regex scan_parser(
|
|
"!scan[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
|
|
);
|
|
static const boost::regex scan_dir_parser(
|
|
"!scan-path[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
|
|
"[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
|
|
"(?:"
|
|
"[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
|
|
")?\\s*"
|
|
);
|
|
static const boost::regex entry_parser(
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")"
|
|
"(?:"
|
|
"[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
|
|
"(?:"
|
|
"[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
|
|
"(?:"
|
|
"[[:space:]]+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)*\")"
|
|
")?"
|
|
")?"
|
|
")?"
|
|
"[[:space:]]*");
|
|
static const boost::regex rewrite_parser(
|
|
"!(rewrite-name|rewrite-id)\\s+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
|
|
);
|
|
static const boost::regex debug_parser(
|
|
"!debug\\s+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s*"
|
|
);
|
|
static const boost::regex define_scanner_parser(
|
|
"!define-scanner\\s+"
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // type, index 1
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // scanner regex, index 2
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")\\s+" // format string, index 3
|
|
"([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // format string for name, index 4
|
|
"(?:"
|
|
"\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // id-filter, index 5
|
|
"(?:"
|
|
"\\s+([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")" // filename-filter, index 6
|
|
")?"
|
|
")?"
|
|
"\\s*"
|
|
);
|
|
static const boost::regex error_parser("!.*");
|
|
|
|
if(verbose)
|
|
std::cout << "Processing script " << script << std::endl;
|
|
boost::smatch what;
|
|
std::string line;
|
|
std::ifstream is(script.c_str());
|
|
if(is.bad() || !exists(boost::filesystem::path(script)))
|
|
{
|
|
throw std::runtime_error(std::string("Could not open script file: ") + script);
|
|
}
|
|
while(std::getline(is, line).good())
|
|
{
|
|
if(regex_match(line, what, comment_parser))
|
|
{
|
|
// Nothing to do here...
|
|
}
|
|
else if(regex_match(line, what, scan_parser))
|
|
{
|
|
std::string f = unquote(what[1].str());
|
|
if(!boost::filesystem::path(f).is_complete())
|
|
{
|
|
if(prefix.size())
|
|
{
|
|
boost::filesystem::path base(prefix);
|
|
base /= f;
|
|
f = base.string();
|
|
}
|
|
else
|
|
{
|
|
boost::filesystem::path base(script);
|
|
base.remove_filename();
|
|
base /= f;
|
|
f = base.string();
|
|
}
|
|
}
|
|
if(!exists(boost::filesystem::path(f)))
|
|
throw std::runtime_error("Error the file requested for scanning does not exist: " + f);
|
|
scan_file(f);
|
|
}
|
|
else if(regex_match(line, what, debug_parser))
|
|
{
|
|
debug = unquote(what[1].str());
|
|
}
|
|
else if(regex_match(line, what, define_scanner_parser))
|
|
{
|
|
add_file_scanner(unquote(what.str(1)), unquote(what.str(2)), unquote(what.str(3)),
|
|
unquote(what.str(4)), unquote(what.str(5)), unquote(what.str(6)));
|
|
}
|
|
else if(regex_match(line, what, scan_dir_parser))
|
|
{
|
|
std::string d = unquote(what[1].str());
|
|
std::string m = unquote(what[2].str());
|
|
bool r = unquote(what[3].str()) == "true";
|
|
if(!boost::filesystem::path(d).is_complete())
|
|
{
|
|
if(prefix.size())
|
|
{
|
|
boost::filesystem::path base(prefix);
|
|
base /= d;
|
|
d = base.string();
|
|
}
|
|
else
|
|
{
|
|
boost::filesystem::path base(script);
|
|
base.remove_filename();
|
|
base /= d;
|
|
d = base.string();
|
|
}
|
|
}
|
|
if(verbose)
|
|
std::cout << "Scanning directory " << d << std::endl;
|
|
if(!exists(boost::filesystem::path(d)))
|
|
throw std::runtime_error("Error the path requested for scanning does not exist: " + d);
|
|
scan_dir(d, m, r);
|
|
}
|
|
else if(regex_match(line, what, rewrite_parser))
|
|
{
|
|
bool id = what[1] == "rewrite-id";
|
|
std::string a = unquote(what[2].str());
|
|
std::string b = unquote(what[3].str());
|
|
id_rewrite_list.push_back(id_rewrite_rule(a, b, id));
|
|
}
|
|
else if(line.compare(0, 9, "!exclude ") == 0)
|
|
{
|
|
static const boost::regex delim("([^\"[:space:]]+|\"(?:[^\"\\\\]|\\\\.)+\")");
|
|
boost::sregex_token_iterator i(line.begin() + 9, line.end(), delim, 0), j;
|
|
while(i != j)
|
|
{
|
|
index_info info;
|
|
info.term = escape_to_xml(unquote(*i));
|
|
// Erase all entries that have a category in our scanner set,
|
|
// plus any entry with no category at all:
|
|
index_terms.erase(info);
|
|
for(file_scanner_set_type::iterator pscan = file_scanner_set.begin(); pscan != file_scanner_set.end(); ++pscan)
|
|
{
|
|
info.category = (*pscan).type;
|
|
index_terms.erase(info);
|
|
}
|
|
++i;
|
|
}
|
|
}
|
|
else if(regex_match(line, error_parser))
|
|
{
|
|
std::cerr << "Error: Unable to process line: " << line << std::endl;
|
|
}
|
|
else if(regex_match(line, what, entry_parser))
|
|
{
|
|
try{
|
|
// what[1] is the Index entry
|
|
// what[2] is the regex to search for (optional)
|
|
// what[3] is a section id that must be matched
|
|
// in order for the term to be indexed (optional)
|
|
// what[4] is the index category to place the term in (optional).
|
|
index_info info;
|
|
info.term = escape_to_xml(unquote(what.str(1)));
|
|
std::string s = unquote(what.str(2));
|
|
if(s.size())
|
|
info.search_text = boost::regex(s, boost::regex::icase|boost::regex::perl);
|
|
else
|
|
info.search_text = boost::regex("\\<" + what.str(1) + "\\>", boost::regex::icase|boost::regex::perl);
|
|
|
|
s = unquote(what.str(3));
|
|
if(s.size())
|
|
info.search_id = s;
|
|
if(what[4].matched)
|
|
info.category = unquote(what.str(4));
|
|
std::pair<std::set<index_info>::iterator, bool> pos = index_terms.insert(info);
|
|
if(pos.second)
|
|
{
|
|
if(pos.first->search_text != info.search_text)
|
|
{
|
|
//
|
|
// Merge the search terms:
|
|
//
|
|
const_cast<boost::regex&>(pos.first->search_text) =
|
|
"(?:" + pos.first->search_text.str() + ")|(?:" + info.search_text.str() + ")";
|
|
}
|
|
if(pos.first->search_id != info.search_id)
|
|
{
|
|
//
|
|
// Merge the ID constraints:
|
|
//
|
|
const_cast<boost::regex&>(pos.first->search_id) =
|
|
"(?:" + pos.first->search_id.str() + ")|(?:" + info.search_id.str() + ")";
|
|
}
|
|
}
|
|
}
|
|
catch(const boost::regex_error&)
|
|
{
|
|
std::cerr << "Unable to process regular expression in script line:\n \""
|
|
<< line << "\"" << std::endl;
|
|
throw;
|
|
}
|
|
catch(const std::exception&)
|
|
{
|
|
std::cerr << "Unable to process script line:\n \""
|
|
<< line << "\"" << std::endl;
|
|
throw;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
std::cerr << "Error: Unable to process line: " << line << std::endl;
|
|
}
|
|
}
|
|
}
|
|
|