/*
   Bacula(R) - The Network Backup Solution

   Copyright (C) 2000-2023 Kern Sibbald

   The original author of Bacula is Kern Sibbald, with contributions
   from many others, a complete list can be found in the file AUTHORS.

   You may use this file and others of this release according to the
   license defined in the LICENSE file, which includes the Affero General
   Public License, v3.0 ("AGPLv3") and some additional permissions and
   terms pursuant to its AGPLv3 Section 7.

   This notice must be preserved when any source code is
   conveyed and/or propagated.

   Bacula(R) is a registered trademark of Kern Sibbald.
*/

/* Check for malware in the catalog 
 * Written by Eric Bollengier Apr 2022
 */

#include "bacula.h"
#include "dird.h"

#define dbglvl 50

/* Get hash type from the string length */
static const char *hash_get_type(int len)
{
   switch(len) {
   case 22:
      return "MD5";
   case 43:
      return "SHA256";
   default:
      Dmsg1(dbglvl, "Unknown hash len %d\n", len);
      return NULL;
   }
}

/* We should run the load operation inside a BEGIN/COMMIT 
 * mode: 0   Skip the load (already loaded)
 *       1   Truncate the table and load the MD5
 *       2   Load the MD5
 *
 * source:   Source of the information
 * fname:    Name of the malware hash list
 *
 * return false with errmsg set if any problem
 */
static bool load_malware_db(JCR *jcr, BDB *db, int mode, const char *source, const char *fname, POOLMEM **errmsg)
{
   POOL_MEM out, tmp, esc, line, esc_source;
   FILE *fp = NULL;
   const char *type=NULL;
   bool ret = false;
   int64_t nb=0;

   if (mode == 0) {             /* Already loaded, nothing to do */
      return true;
   }

   /* TODO: We are going to update the malware database, we need to make sure
    * only one job is doing that work at a time, the next one will just jump
    * over it. Normally it's per catalog.
    */
   Dmsg1(dbglvl, "Load malware database from %s\n", fname);

   /* We open the malware hash database */
   fp = fopen(fname, "r");
   if (!fp) {
      berrno be;
      Mmsg(errmsg, _("[DE0053] Unable to open the Malware Database export %s ERR=%s\n"),
           fname, be.bstrerror());
      goto bail_out;
   }

   /* We will now read the file to insert all records in the catalog.
    * At some point, we might use the batch connection for this work (TODO)
    */
   db_lock(db);
   {
      while (bfgets(out.addr(), fp)) {
         strip_trailing_junk(out.c_str());
         if (out.c_str()[0] == '#') { // Skip comments
            continue;
         }

         /* Look if we can empty the checksum table before to insert data */
         int len = strlen(out.c_str());
         if (!type) {
            type = hash_get_type(len);
            if (!type) {
               continue;
            }
            if (mode == 1) {
               Dmsg0(dbglvl, "Truncate the current malware table\n");
               /* On postgresql, a truncate inside a transaction will disable
                * the WAL logging for the next command and speedup the insertion
                */
               Mmsg(tmp, sql_clear_malware_table[db_get_type_index(db)], type);
               if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) {
                  /* It is not the end of the world if the truncate doesn't work
                   * we will have just too many records, but I'm not expecting a
                   * failure here
                   */
                  Dmsg2(dbglvl, "Unable to execute %s %s\n", tmp.c_str(), db->errmsg);
               }
            }
         }

         /* Hash are usually fine, but the data is coming from outside */
         esc.check_size(2*len+2);
         db_escape_string(jcr, db, esc.c_str(), out.c_str(), len);

         /* We batch the insertion of the checksum to limit the number of SQL queries */
         Mmsg(tmp, "('%s')", esc.c_str());
         if (line.c_str()[0]) {
            pm_strcat(line, ",");
         }
         pm_strcat(line, tmp.c_str());

         if ((nb % 5000) == 0) {
            if ((nb % 100000) == 0) {
               Dmsg0(dbglvl, "Sent 100000 records to the Malware table\n");
            }
            Mmsg(tmp, "INSERT INTO Malware%s (MD5) VALUES %s", type, line.c_str());
            if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) {
               Mmsg(errmsg, "[DE0028] SQL Error %s\n", db->errmsg);
               db_unlock(db);
               goto bail_out;
            }
            pm_strcpy(line, "");
         }
         nb++;
      }

      /* We might still have some checksum to store */
      if (line.c_str()[0]) {
         Mmsg(tmp, "INSERT INTO Malware%s (MD5) VALUES %s", type, line.c_str());
         if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) {
            Mmsg(errmsg, "[DE0028] SQL Error %s\n", db->errmsg);
            db_unlock(db);
            goto bail_out;
         }
      }
      if (type) {
         Mmsg(tmp, "ANALYZE Malware%s", type);
         if (!db_sql_query(db, tmp.c_str(), NULL, NULL)) {
            Dmsg2(dbglvl, "Unable to execute %s %s\n", tmp.c_str(), db->errmsg);
         }
      }
      Dmsg2(dbglvl, "Inserted %lld %s checksums\n", nb, NPRT(type));
   }
   db_unlock(db);
   ret = true;   

bail_out:
   if (fp) {
      fclose(fp);
   }
   return ret;
}

/* Update the malware catalog entries 
 * It is done in a transaction, so even running jobs checking for malware should
 * be able to run in parallel
 * -1 : Error while download the latest database
 *  0 : Nothing to do
 *  1 : Reset and load the file fname in the catalog
 *  2 : Load the file fname in the catalog
 */
static int update_malware_db(JCR *jcr, char *update_cmd, POOLMEM **fname, POOLMEM **errmsg)
{
   Dmsg1(dbglvl, "Updating the malware database via %s\n", update_cmd);

   /* The command will return the name of the checksum database file */
   int ret = run_program(update_cmd, 300, *fname);
   strip_trailing_junk(*fname);

   if (ret == 0 || ret & b_errno_exit) {
      ret = ret & ~b_errno_exit;
      if (ret == 0) {
         Dmsg0(dbglvl, "Malware database can be loaded\n");
         return 1;

      } else if (ret == 2) {
         Dmsg0(dbglvl, "Malware database can be updated\n");
         return 2;

      } else if (ret == 1) {
         Dmsg0(dbglvl, "Malware database is up to date\n");
         return 0;
      }
   }
   pm_strcpy(fname, "");
   Mmsg(errmsg, _("[DE051] Unable to update the Malware Database ret=%d\n"), ret);
   return -1;
}

#if 0
bool ua_update_malware_db(UAContext *ua)
{

   return true;
}

bool list_malware(JCR *jcr, const char *jobids)
{
   return true;
}
#endif

extern const char *exepath;     // defined in lib/messages.c

/* Check if a given set of jobids has a malware 
 * Return code:
 *   0 - Nothing found
 *  -1 - Error while processing the data (info in errmsg)
 *   1 - Found a malware (info in errmsg)
 */
int check_malware(JCR *jcr, const char *jobids, POOLMEM **errmsg)
{
   POOL_MEM q, fname, source_esc;
   const char *type = NULL;
   const char *source = NULL;
   alist lst(owned_by_alist, 1), *l;
   l =  &lst;
   pm_strcpy(errmsg, "");

#if 0                 // Not working, job record probably not always up to date
   uint32_t i=0;
   /* We fetch the first checksum for the set of jobs to determine the hash type */   
   Mmsg(q, "SELECT SUM(JobFiles) FROM Job WHERE JobId IN (%s)", jobids);
   if (!db_sql_query(jcr->db, q.c_str(), db_int_handler, &i)) {
      Mmsg(errmsg, "[DE0028] SQL Error %s\n", jcr->db->errmsg);
      return -1;
   }
   /* We need at least one checksum to determine the hash type */
   if (i == 0) {
      Mmsg(errmsg, "[DI0052] Nothing to check for JobId %s [%s]\n", jobids, q.c_str());
      Dmsg1(dbglvl, "%s", *errmsg);
      return 0;
   }
#endif

   /* We fetch the first checksum for the set of jobs to determine the hash type */
   Mmsg(q, "SELECT MD5 FROM File "
        "WHERE Filename <> '' AND MD5 <> '0' AND MD5 <> '' AND JobId IN (%s) LIMIT 1", jobids);

   if (!db_sql_query(jcr->db, q.c_str(), db_string_list_handler, &l)) {
      Mmsg(errmsg, "[DE0028] SQL Error %s\n", jcr->db->errmsg);
      return -1;
   }

   /* We need at least one checksum to determine the hash type */
   if (lst.size() != 1) {
      Mmsg(errmsg, "[DE0054] Unable to find a checksum for JobId %s. Use Signature = MD5/SHA256 FileSet option\n", jobids);
      return -1;
   }

   type = hash_get_type(strlen((char *)lst[0]));
   if (!type) {
      Mmsg(errmsg, "[DE0055] Unable to find a valid checksum database for JobId %s. Only signature MD5 and SHA256 are currently supported.\n", jobids);
      return -1;
   }

   Dmsg1(dbglvl, "Found checksum type %s\n", type);

   Mmsg(fname, "%s/malware-%s.dat", working_directory, type);
   // TODO: It is not working
   // We should take director and copy it locally, we can have problems with reload
   if (director->get_malwaredb_command) {
      regex_t re;
      regmatch_t pmatch[3];
      if (regcomp(&re, "get_malware_(.+)$", 0) != 0) {
         Dmsg0(dbglvl, "Unable to compile regex\n");
      }

      if (regexec(&re, director->get_malwaredb_command, 2, pmatch, REG_EXTENDED) == 0) {
         source = director->get_malwaredb_command + pmatch[1].rm_so;

      } else {
         source = director->get_malwaredb_command;
      }
      regfree(&re);

      if (director->get_malwaredb_command[0] == '/' || director->get_malwaredb_command[0] == ':') {
         Mmsg(q, "%s %s %s", director->get_malwaredb_command, type, fname.c_str());

      } else {
         Mmsg(q, "%s/%s %s %s", exepath, director->get_malwaredb_command, type, fname.c_str());
      }

   } else {
      source = "abuse.ch";
      Mmsg(q, "%s/get_malware_%s %s %s", exepath, source, type, fname.c_str());
   }

   int ret = update_malware_db(jcr, q.c_str(), fname.handle(), errmsg);
   if (ret < 0) {
      return -1;
   }

   db_lock(jcr->db);
   if (!db_sql_query(jcr->db, "BEGIN", NULL, NULL)) {
      Mmsg(errmsg, "[DE0028] SQL Error %s\n", jcr->db->errmsg);
      db_unlock(jcr->db);
      return -1;
   }

   // Small macro to cleanup
#define commit_and_unlock(db) do {                   \
    if (!db_sql_query(db, "COMMIT", NULL, NULL)) {   \
       db_unlock(db);                                \
       return -1;                                    \
    }                                                \
    db_unlock(db);                                   \
 } while (0)

   /* TODO: Here we must load the malware database with a dedicated SQL connection 
    * and a global lock (no need to load the malware database multiple times in //
    */
   if (!load_malware_db(jcr, jcr->db, ret, source, fname.c_str(), errmsg)) {
      commit_and_unlock(jcr->db);
      events_send_msg(jcr,
                      "DD0007",
                      EVENTS_TYPE_SECURITY, "*Director*", (intptr_t)jcr,
                      "Unable to update malware hash database from %s. %s", source, errmsg);
      return -1;
   }

   // TODO: Check if we need to be inside the transaction or not
   commit_and_unlock(jcr->db);

   /* Keep track of important events */
   events_send_msg(jcr,
                   "DD0006",
                   EVENTS_TYPE_SECURITY, "*Director*", (intptr_t)jcr,
                   "Malware hash database updated from %s", source);

   /* Add the source of information in the table */
   int slen = strlen(source);
   source_esc.check_size(2*slen+2);
   db_escape_string(jcr, jcr->db, source_esc.c_str(), source, slen);
   
   /* We keep track of the infected files in the FileEvents table */
   Mmsg(q, "INSERT INTO FileEvents (SourceJobId, JobId, FileIndex, Type, Description, Severity, Source) "
        "SELECT %ld, JobId, FileIndex, 'M', 'Malware found', 100, '%s' FROM File JOIN Malware%s USING (MD5) "
        "WHERE JobId IN (%s)", jcr->JobId, source_esc.c_str(), type, jobids);

   if (!db_sql_query(jcr->db, q.c_str(), NULL, NULL)) {
      Mmsg(errmsg, "[DE0028] SQL Error %s\n", jcr->db->errmsg);
      return -1;
   }

   uint32_t nb=0;
   Mmsg(q, "SELECT 1 FROM FileEvents JOIN File USING (JobId, FileIndex) "
        "WHERE FileEvents.JobId IN (%s) AND Type = 'M' LIMIT 1\n", jobids);

   if (!db_sql_query(jcr->db, q.c_str(), db_int_handler, &nb)) {
      Mmsg(errmsg, "[DE0056] Unable to check malware for JobId %s\n", jobids);
      return -1;
   }

   if (nb > 0) {
      Mmsg(errmsg, _("[DE0056] Found malware(s) on JobId %s\n"), jobids);
      return 1;
   }
   /* Leave a message */
   Mmsg(errmsg, _("[DI0050] No known malware reported by \"%s\"\n"), source);
   return 0;
}

#if 0
   char *f;
   lst.destroy();
   Mmsg(q, "SELECT Filename FROM SecurityEvents JOIN File USING (JobId, FileIndex) WHERE SecurityEvents.JobId IN (%s) LIMIT 1000\n", jobids);
   if (!db_sql_query(jcr->db, q.c_str(), db_string_list_handler, &l)) {
      Mmsg(errmsg, "[DE0056] Unable to check malware for JobId %s\n", jobids);
      return false;
   }   
   if (lst.size() > 0) {
      Jmsg(jcr, M_ERROR, 0, _("Found %s %d malware(s) on\n"), (lst.size() == 1000) ? _("more than") : "", lst.size());
      foreach_alist(f, &lst) {
         Jmsg(jcr, M_INFO, 0, "   %s\n", f);
      }
   }
#endif
