//----------------------------------------------------------- // Copyright Christian Arnault LAL-Orsay CNRS // arnault@lal.in2p3.fr // See the complete license in cmt_license.txt "http://www.cecill.info". //----------------------------------------------------------- #include "cmt_deps_builder.h" #include "cmt_system.h" #include "cmt_use.h" #include "cmt_include.h" #include "cmt_symbol.h" #include "cmt_log.h" // // While parsing a C++ file, these are the possible usefull // states we can reach. // Each state correspond to a specific state action function. // enum state_def { at_start, // beginning of the file in_line, // along a line in_string, // inside a quoted string in_char, // inside a quoted char in_comment, // inside a multi-line comment in_string_comment, // inside a quoted string in a comment in_char_comment, // inside a quoted char in a comment in_line_comment // inside a single-line comment }; //-------------------------------------------------- static int build_deps (const cmt_string& name, const cmt_string& dir_name, int current_path_index, const CmtSystem::cmt_string_vector& include_paths, const CmtSystem::cmt_string_vector& substitutions, CmtSystem::cmt_string_vector& all_deps, CmtSystem::cmt_string_vector& deps); //-------------------------------------------------- static void header_file_action (const char* header_file, const cmt_string& dir_name, int current_path_index, const CmtSystem::cmt_string_vector& include_paths, const CmtSystem::cmt_string_vector& substitutions, CmtSystem::cmt_string_vector& all_deps, CmtSystem::cmt_string_vector& deps) { Log; bool found = false; for (int i = 0; i < all_deps.size (); i++) { const cmt_string& n = all_deps[i]; log << "CMT> check old header file name=" << n << " against " << header_file << log_endl; if (n == header_file) { found = true; break; } } if (!found) { log << "CMT> parsing new header file name=" << header_file << log_endl; all_deps.push_back (header_file); int path_index = build_deps (header_file, dir_name, current_path_index, include_paths, substitutions, all_deps, deps); if (path_index >= 0) { cmt_string full_name; if (path_index == 1) { full_name = dir_name; full_name += CmtSystem::file_separator (); if (current_path_index >= 2) { full_name.replace (include_paths[current_path_index - 2], substitutions[current_path_index - 2]); } } else if (path_index > 1) { full_name = substitutions[path_index - 2]; full_name += CmtSystem::file_separator (); } full_name += header_file; deps.push_back (full_name); } } } //-------------------------------------------------- static char* at_start_action (char* ptr, state_def& state, const cmt_string& dir_name, int current_path_index, const CmtSystem::cmt_string_vector& include_paths, const CmtSystem::cmt_string_vector& substitutions, CmtSystem::cmt_string_vector& all_deps, CmtSystem::cmt_string_vector& deps) { char term = 0; // To ignore leading spaces and tabs while ( (*ptr == ' ') || (*ptr == '\t')) { ptr++; } if (*ptr == '#') { ptr++; // skip spaces while ((*ptr == ' ') || (*ptr == '\t')) ptr++; if (!strncmp (ptr, "include", 7)) { // We have a #include statement ptr += 7; while ((*ptr == ' ') || (*ptr == '\t')) ptr++; if (*ptr == '<') { term = '>'; ptr++; } else if (*ptr == '"') { term = '"'; ptr++; } else { // empty #include statement?? state = in_line; ptr += strlen (ptr); return (ptr); } } else { // ignore other pre-processor statements state = in_line; ptr += strlen (ptr); return (ptr); } } else if (!strncmp (ptr, " include", 13)) { // fortran include statement ptr += 13; while ((*ptr == ' ') || (*ptr == '\t')) ptr++; if (*ptr == '\'') { term = '\''; ptr++; } else { state = in_line; return (ptr); } } else if (!strncmp (ptr, "\tinclude", 8)) { // fortran include statement ptr += 8; while ((*ptr == ' ') || (*ptr == '\t')) ptr++; if (*ptr == '\'') { term = '\''; ptr++; } else { state = in_line; return (ptr); } } else { state = in_line; return (ptr); } // At this point we do have to include a header file. char* end; end = strchr (ptr, term); if (end != 0) { *end = 0; } const char* header_file = ptr; header_file_action (header_file, dir_name, current_path_index, include_paths, substitutions, all_deps, deps); if (end != 0) { *end = term; } state = in_line; ptr += strlen (ptr); return (ptr); } /** Any line with no header inclusion step through comments and strings */ static char* in_line_action (char* ptr, state_def& state) { char* pattern = ptr + strlen (ptr); int length = 0; /* search for the first occurrence of {single-quote double-quote open-comment open-line-comment} Must exclude escaped quotes \' and \" */ char* pos = strchr (ptr, '"'); if ((pos != 0) && (pos < pattern) && (pos > ptr) && (*(pos-1) != '\\')) { state = in_string; pattern = pos; length = 1; } pos = strchr (ptr, '\''); if ((pos != 0) && (pos < pattern) && (pos > ptr) && (*(pos-1) != '\\')) { state = in_char; pattern = pos; length = 1; } pos = strstr (ptr, "/*"); //*/ if ((pos != 0) && (pos < pattern)) { state = in_comment; pattern = pos; length = 2; } pos = strstr (ptr, "//"); if ((pos != 0) && (pos < pattern)) { state = in_line_comment; pattern = pos; length = 2; } ptr = pattern + length; return (ptr); } //-------------------------------------------------- static char* in_string_action (char* ptr, state_def& state) { // we exclusively look for a double quote char* pos = strchr (ptr, '"'); if (pos == 0) { // This string is not finished till the end of the line.. // we expect it to continue to the next line... // thus we leave the state as it is ptr += strlen (ptr); } else { if ((pos > ptr) && (*(pos - 1) == '\\')) { ptr = pos + 1; } else { ptr = pos + 1; state = in_line; } } return (ptr); } //-------------------------------------------------- static char* in_char_action (char* ptr, state_def& state) { // we exclusively look for a single quote char* pos = strchr (ptr, '\''); if (pos == 0) { // This string is not finished till the end of the line.. // we expect it continues to the nex line... // thus we leave the state as it is ptr += strlen (ptr); } else { if ((pos > ptr) && (*(pos - 1) == '\\')) { ptr = pos + 1; } else { ptr = pos + 1; state = in_line; } } return (ptr); } //-------------------------------------------------- static char* in_comment_action (char* ptr, state_def& state) { char* pattern = ptr + strlen (ptr); int length = 0; char* pos; /* Even if we are inside a comment, we must detect strings since comment markers may be written inside them. pos = strchr (ptr, '"'); if ((pos != 0) && (pos < pattern) && (pos > ptr) && (*(pos-1) != '\\')) { state = in_string_comment; pattern = pos; length = 1; } pos = strchr (ptr, '\''); if ((pos != 0) && (pos < pattern) && (pos > ptr) && (*(pos-1) != '\\')) { state = in_char_comment; pattern = pos; length = 1; } */ pos = strstr (ptr, "*/"); if ((pos != 0) && (pos < pattern)) { state = in_line; pattern = pos; length = 2; } ptr = pattern + length; return (ptr); } //-------------------------------------------------- static char* in_string_comment_action (char* ptr, state_def& state) { char* pos = strchr (ptr, '"'); if (pos == 0) { // This string is not finished till the end of the line.. // we expect it continues to the nex line... ptr += strlen (ptr); } else { if ((pos > ptr) && (*(pos - 1) == '\\')) { ptr = pos + 1; } else { ptr = pos + 1; state = in_comment; } } return (ptr); } //-------------------------------------------------- static char* in_char_comment_action (char* ptr, state_def& state) { char* pos = strchr (ptr, '\''); if (pos == 0) { // This string is not finished till the end of the line.. // we expect it continues to the nex line... ptr += strlen (ptr); } else { if ((pos > ptr) && (*(pos - 1) == '\\')) { ptr = pos + 1; } else { ptr = pos + 1; state = in_comment; } pos--; } return (ptr); } //-------------------------------------------------- static char* in_line_comment_action (char* ptr, state_def& state) { char * pos = strchr (ptr, '\\'); /* Extend this part to deal with continuation character */ if ( (pos == NULL) || ( (ptr + strlen(ptr)-1)!=pos )) { state = in_line; } ptr += strlen (ptr); return (ptr); } //-------------------------------------------------- static void build_deps_text (char* text, const cmt_string& dir_name, int current_path_index, const CmtSystem::cmt_string_vector& include_paths, const CmtSystem::cmt_string_vector& substitutions, CmtSystem::cmt_string_vector& all_deps, CmtSystem::cmt_string_vector& deps) { Log; int pos; int max_pos; int line_number = 1; log << "CMT> build_deps_text dir_name=" << dir_name << log_endl; // erase of continuation character pos = 0; max_pos = strlen (text); char* current = text; char* last = text + max_pos; while (current < last) { char* crnl = strstr (current, "\r\n"); char* nl = strstr (current, "\n"); if ( (crnl==0) && (nl ==0)) break; int length = 0; char * ptr = 0; if (nl==0) //crnl > 0 { length = 3; ptr = crnl; } else if (crnl==0) //nl > 0 { length = 2; ptr = nl; } else if (crnl < nl) { length = 3; ptr = crnl; } else // (crnl > nl) { length = 2; ptr = nl; } strcpy (ptr, ptr+length); current = ptr; last -= length; } pos = 0; max_pos = strlen (text); current = text; last = text + max_pos; state_def state = at_start; while (current < last) { char marker; char* marker_pos = 0; char* crnl = strstr (current, "\r\n"); char* nl = strchr (current, '\n'); char* first = nl; int length = 1; char* ptr = 0; if (crnl != 0) { // cr+nl has been found if (nl == 0) { // cr but no nl ?? first = crnl; length = 2; } else { // both cr+nl and nl found first = (nl < crnl) ? nl : crnl; length = (nl < crnl) ? 1 : 2; } } else { // no cr+nl but nl alone found first = nl; length = 1; } ptr = current; if (first == 0) { // neither nl nor cr+nl found => this is the last line marker_pos = 0; } else { marker_pos = first; marker = *marker_pos; *marker_pos = 0; } log << "CMT> build_deps_text2 line=[" << current << "]" << log_endl; while (strlen (ptr) > 0) { switch (state) { case at_start: ptr = at_start_action (ptr, state, dir_name, current_path_index, include_paths, substitutions, all_deps, deps); break; case in_line: ptr = in_line_action (ptr, state); break; case in_string: ptr = in_string_action (ptr, state); break; case in_char: ptr = in_char_action (ptr, state); break; case in_comment: ptr = in_comment_action (ptr, state); break; case in_string_comment: ptr = in_string_comment_action (ptr, state); break; case in_char_comment: ptr = in_char_comment_action (ptr, state); break; case in_line_comment: ptr = in_line_comment_action (ptr, state); break; } } if (state == in_line) state = at_start; line_number++; if (marker_pos != 0) { *marker_pos = marker; current = marker_pos + length; } else { break; } } } //-------------------------------------------------- static int build_deps (const cmt_string& name, const cmt_string& dir_name, int current_path_index, const CmtSystem::cmt_string_vector& include_paths, const CmtSystem::cmt_string_vector& substitutions, CmtSystem::cmt_string_vector& all_deps, CmtSystem::cmt_string_vector& deps) { Log; int result = -1; cmt_string new_dir; log << "CMT> build_deps name=" << name << " dir_name=" << dir_name << log_endl; // // Return 0 when the file is found in the current directory // if (CmtSystem::test_file (name)) { cmt_string text; text.read (name); char* ptr = &text[0]; CmtSystem::dirname (name, new_dir); build_deps_text (ptr, new_dir, current_path_index, include_paths, substitutions, all_deps, deps); return (0); } cmt_string full_name; full_name = dir_name; full_name += CmtSystem::file_separator (); full_name += name; // // Return 1 when the file is found in the directory of the // upper level source file // if (CmtSystem::test_file (full_name)) { cmt_string text; text.read (full_name); char* ptr = &text[0]; CmtSystem::dirname (full_name, new_dir); build_deps_text (ptr, new_dir, current_path_index, include_paths, substitutions, all_deps, deps); return (1); } int path_index = -1; // // Return [path_index + 2] when the include file is found at one of // the include_paths // for (path_index = 0; path_index < include_paths.size (); path_index++) { full_name = include_paths[path_index]; full_name += CmtSystem::file_separator (); full_name += name; log << "CMT> build_deps2 full_name=" << full_name << log_endl; if (CmtSystem::test_file (full_name)) { cmt_string text; text.read (full_name); char* ptr = &text[0]; CmtSystem::dirname (full_name, new_dir); log << "CMT> build_deps3 new_dir=" << new_dir << log_endl; build_deps_text (ptr, new_dir, path_index + 2, include_paths, substitutions, all_deps, deps); return (path_index + 2); } } log << "CMT> build_deps3" << log_endl; return (-1); } //-------------------------------------------------------------------------- void DepsBuilder::clear () { m_include_paths.clear (); m_substitutions.clear (); } //-------------------------------------------------------------------------- void DepsBuilder::add (const cmt_string& path, const cmt_string& substitution) { if (path[path.size () - 1] == CmtSystem::file_separator ()) { cmt_string p = path; p.erase (path.size () - 1); m_include_paths.push_back (p); } else { m_include_paths.push_back (path); } m_substitutions.push_back (substitution); } //-------------------------------------------------------------------------- void DepsBuilder::add_includes (const Use& use) { Log; const Include::IncludeVector& includes = use.includes; int include_number; for (include_number = 0; include_number < includes.size (); include_number++) { const Include& include = includes[include_number]; cmt_string temp = include.name; cmt_string pattern; cmt_string name; char end_pattern; int start = 0; for (;;) { int begin; begin = temp.find (start, "${"); if (begin != cmt_string::npos) { end_pattern = '}'; } else { begin = temp.find (start, "$("); if (begin != cmt_string::npos) { end_pattern = ')'; } else { break; } } start = begin + 2; int end; end = temp.find (start, end_pattern); if (end == cmt_string::npos) break; if (end < begin) break; start = end + 1; temp.substr (begin, end - begin + 1, pattern); temp.substr (begin + 2, end - begin - 2, name); Symbol* macro = Symbol::find (name); if (macro != 0) { cmt_string value = macro->resolve_macro_value (); value += CmtSystem::file_separator (); temp.replace_all (pattern, value); } else { cmt_string value = CmtSystem::getenv (name); value += CmtSystem::file_separator (); temp.replace_all (pattern, value); } } log << "include = " << temp << log_endl; add (temp, include.name); } } //-------------------------------------------------------------------------- CmtSystem::cmt_string_vector& DepsBuilder::run (const cmt_string& file_name) { Log; log << "Starting deps builder on " << file_name << log_endl; m_deps.clear (); m_all_deps.clear (); cmt_string preprocessor; Symbol* macro = Symbol::find ("preprocessor_command"); if (macro != 0) { preprocessor = macro->resolve_macro_value (); } if (preprocessor == "") { // // Since no preprocessor command is defined, // we use the internal mechanism provided here. // cmt_string new_dir; CmtSystem::dirname (file_name, new_dir); build_deps (file_name, new_dir, 0, m_include_paths, m_substitutions, m_all_deps, m_deps); } else { // // An external preprocessor command is defined. We expect it // to follow a "standard" syntax for its output, ie: // o It starts with: // .o: ... // o There may be many lines with trailing back-slashes // o All entries are space-separated // o One of the entries is the source file name itself // // The preprocessor command expects the list of -I options // (resolved from the "includes" macro) and the list of // -D/-U options (resolved from the "*_pp_*flags" macros) // // // Building the complete command (still the pp_*flags are // missing) // preprocessor += " "; macro = Symbol::find ("includes"); preprocessor += macro->resolve_macro_value (); preprocessor += " "; preprocessor += file_name; cmt_string output; CmtSystem::execute (preprocessor, output); // // Make the output as one single big line. // output.replace_all ("\n", " "); output.replace_all ("\\ ", " "); CmtSystem::cmt_string_vector files; CmtSystem::split (output, " \t", files); // // Analyze each entry // for (int i = 1; i < files.size (); i++) { const cmt_string& file = files[i]; if (file == file_name) continue; cmt_string dir; cmt_string name; cmt_string full_name; CmtSystem::dirname (file, dir); // // Only declared include_paths will be taken into account // Others are considered as system include paths. // for (int j = 0; j < m_include_paths.size (); j++) { const cmt_string& p = m_include_paths[j]; if (dir == p) { CmtSystem::basename (file, name); full_name = m_substitutions[j]; full_name += name; // // We add in the "m_deps" list the symbolic form // of the path rather that the expanded one. // m_deps.push_back (full_name); break; } } } } return (m_deps); }