#!/usr/bin/env ruby

#    Copyright 2013 Mirantis, Inc.
#
#    Licensed under the Apache License, Version 2.0 (the "License"); you may
#    not use this file except in compliance with the License. You may obtain
#    a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#    Unless required by applicable law or agreed to in writing, software
#    distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
#    WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
#    License for the specific language governing permissions and limitations
#    under the License.

begin
  require 'rubygems'
rescue LoadError
end
require 'ohai/system'
require 'json'
require 'httpclient'
require 'logger'
require 'optparse'
require 'yaml'
require 'ipaddr'
require 'rethtool'
require 'digest'

unless Process.euid == 0
  puts "You must be root"
  exit 1
end

ENV['PATH'] = "/bin:/usr/bin:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"

AGENT_CONFIG = "/etc/nailgun-agent/config.yaml"

# look at https://github.com/torvalds/linux/blob/master/Documentation/devices.txt
# KVM virtio volumes has code 252 in CentOS, but 253 in Ubuntu
# Please also update the device codes here
# https://github.com/stackforge/fuel-astute/blob/master/mcagents/erase_node.rb#L81
STORAGE_CODES = [3, 8, 65, 66, 67, 68, 69, 70, 71, 104, 105, 106, 107, 108, 109, 110, 111, 202, 252, 253]
REMOVABLE_VENDORS = [
  "Adaptec", "IBM", "ServeRA",
]

def digest(body)
  if body.is_a? Hash
    digest body.map { |k,v| [digest(k),digest(v)].join("=>") }.sort
  elsif body.is_a? Array
    body.map{ |v| digest v }.join('|')
  else
    [body.class.to_s, body.to_s].join(":")
  end
end

def createsig(body)
  Digest::SHA1.hexdigest( digest body )
end

class McollectiveConfig
  def initialize(logger)
    @logger = logger
    @configfile = '/etc/mcollective/server.cfg'
  end

  def get_config_by_key(find_key)
    found_key = nil
    found_value = nil
    # This code is from mcollective's sources
    File.open(@configfile, "r").each do |line|
      # strip blank spaces, tabs etc off the end of all lines
      line.gsub!(/\s*$/, "")
      unless line =~ /^#|^$/
        if line =~ /(.+?)\s*=\s*(.+)/
          key = $1
          val = $2
          if key == find_key
            found_key = key
            found_value = val
          end
        end
      end
    end

    found_value if found_key
  end

  def replace_identity(new_id)
    # check if id complies reqs
    raise 'Identities can only match /\w\.\-/' unless new_id.to_s.match(/^[\w\.\-]+$/)

    value_from_config = get_config_by_key('identity')

    if value_from_config == new_id.to_s
      @logger.info "MCollective is up to date with identity = #{new_id}"
    else
      config = File.open(@configfile, "rb").read
      if value_from_config
        # Key found, but it has other value
        @logger.info "Replacing identity in mcollective server.cfg to new value = '#{new_id}'"
        config.gsub!(/^identity[ =].*$/, "identity = #{new_id}")
        File.open(@configfile, "w") { |f| f.write(config) }
      else # if key was not found
        config += "\nidentity = #{new_id}\n"
        @logger.info "Identity in mcollective server.cfg has not been found. Setting to '#{new_id}'"
        File.open(@configfile, "w") { |f| f.write(config) }
      end
      # Generally because generic init script for
      # mcollective is broken at least in Ubuntu and
      # altering restart in such way seems better
      # than shipping our own package.
      puts `/etc/init.d/mcollective stop; /etc/init.d/mcollective start`
    end
  end
end


class NodeAgent
  def initialize(logger, url=nil)
    @logger = logger

    @api_default_address = "localhost"
    @api_default_port = "8000"

    @api_url = url

    if @api_url
      @api_url.chomp!('/')
      @api_ip = @api_url.match(/\bhttp:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
    else
      begin
        cmdline = ::File.read("/proc/cmdline")
        @api_ip = cmdline.match(/\burl=http:\/\/((\d{1,3}\.){3}\d{1,3})/)[1]
        @logger.info("Found admin node IP address in kernel cmdline: #{@api_ip}")
      rescue
        @logger.info("Can't get API url from /proc/cmdline. Will use localhost.")
        @api_ip = "127.0.0.1"
      end
      @api_url = "http://#{@api_ip}:#{@api_default_port}/api"
    end

    @os = Ohai::System.new()
    @os.all_plugins
  end

  def put
    headers = {"Content-Type" => "application/json"}
    @logger.debug("Trying to put host info into #{@api_url}")
    res = htclient.put("#{@api_url}/nodes/agent/", _data.to_json, headers)
    @logger.debug("Response: status: #{res.status} body: #{res.body}")
    if res.status < 200 or res.status >= 400
      @logger.error("HTTP PUT failed: #{res.inspect}")
    end
    res
  end

  def post
    headers = {"Content-Type" => "application/json"}
    @logger.debug("Trying to create host using #{@api_url}")
    res = htclient.post("#{@api_url}/nodes/", _data.to_json, headers)
    @logger.debug("Response: status: #{res.status} body: #{res.body}")
    res
  end

  def htclient
    client = HTTPClient.new
    client.connect_timeout = 10
    client.send_timeout    = 10
    client.receive_timeout = 10  # (mihgen): Nailgun may hang for a while, but 10sec should be enough for him to respond
    client
  end

  def _interfaces
    interfaces = @os[:network][:interfaces].inject([]) do |result, elm|
      result << { :name => elm[0], :addresses => elm[1]["addresses"] }
    end
    interfaces << { "default_interface" => @os["network"]["default_interface"] }
    interfaces << { "default_gateway"   => @os["network"]["default_gateway"] }
    interfaces
  end

  def _detailed
    detailed_meta = {
      :system => _system_info,
      :interfaces => [],
      :cpu => {
        :total => (@os[:cpu][:total].to_i rescue nil),
        :real => (@os[:cpu][:real].to_i rescue nil),
        :spec => [],
      },
      :disks => [],
      :memory => (_dmi_memory or _ohai_memory),
    }

    begin
      (@os[:network][:interfaces] or {} rescue {}).each do |int, intinfo|
        # Send info about physical interfaces only
        next if intinfo[:type] !~ /^eth.*/
        # Exception: eth0.0(example) have "type" => "eth" but it is not physical interface
        next if int =~ /\d+\.\d+$/ or int =~ /vlan\d+$/
        # Remove interfaces like eth0.101-hapr, eth1-hapr
        next if int =~ /\d+-.+/

        int_meta = {:name => int}
        int_meta[:state] = intinfo[:state]
        (intinfo[:addresses] or {} rescue {}).each do |addr, addrinfo|
          if (addrinfo[:family] rescue nil) =~ /lladdr/
            int_meta[:mac] = addr
            begin
              int_info = Rethtool::InterfaceSettings.new(int)
              int_meta[:max_speed] = int_info.best_mode.speed
              if int_info.current_mode.speed == :unknown
                int_meta[:current_speed] = nil
              else
                int_meta[:current_speed] = int_info.current_mode.speed
              end
            rescue
              int_meta[:current_speed] = nil
            end
          elsif (addrinfo[:family] rescue nil) =~ /^inet$/
            int_meta[:ip] = addr
            int_meta[:netmask] = addrinfo[:netmask] if addrinfo[:netmask]
          end
        end
        detailed_meta[:interfaces] << int_meta
      end
    rescue Exception => e
      @logger.error("Error '#{e.message}' in gathering interfaces metadata: #{e.backtrace}")
    end

    begin
      (@os[:cpu] or {} rescue {}).each do |cpu, cpuinfo|
        if cpu =~ /^[\d]+/ and cpuinfo
          frequency = cpuinfo[:mhz].to_i rescue nil
          begin
            # ohai returns current frequency, try to get max if possible
            max_frequency = `cat /sys/devices/system/cpu/cpu#{cpu}/cpufreq/cpuinfo_max_freq 2>/dev/null`.to_i / 1000
            frequency = max_frequency if max_frequency > 0
          rescue
          end
          detailed_meta[:cpu][:spec] << {
            :frequency => frequency,
            :model => (cpuinfo[:model_name].gsub(/ +/, " ") rescue nil)
          }
        end
      end
    rescue Exception => e
      @logger.error("Error '#{e.message}' in gathering cpu metadata: #{e.backtrace}")
    end

    begin
      @logger.debug("Trying to find block devices")
      (@os[:block_device] or {} rescue {}).each do |bname, binfo|
        @logger.debug("Found block device: #{bname}")
        @logger.debug("Block device info: #{binfo.inspect}")
        if physical_data_storage_devices.map{|d| d[:name]}.include?(bname) && binfo
          @logger.debug("Block device seems to be physical data storage: #{bname}")
          block = physical_data_storage_devices.select{|d| d[:name] == bname}[0]
          if block[:removable] =~ /^1$/ && ! REMOVABLE_VENDORS.include?(binfo[:vendor])
            next
          end
          dname = bname.gsub(/!/, '/')

          # 512 bytes is the size of one sector by default
          block_size = 512
          fn = "/sys/block/#{bname}/queue/logical_block_size"
          block_size = File.read(fn).to_i if File.exist? fn
          block_size = 512 if block_size == 0
          detailed_meta[:disks] << {
            :name => dname,
            :model => binfo[:model],
            :size => (binfo[:size].to_i * block_size),
            :disk => block[:disk],
            :extra => block[:extra],
            :removable => block[:removable]
          }
        end
      end
      @logger.debug("Detailed meta disks: #{detailed_meta[:disks].inspect}")
    rescue Exception => e
      @logger.error("Error '#{e.message}' in gathering disks metadata: #{e.backtrace}")
    end

    detailed_meta
  end

  def _disk_id_by_name(name)
    dn = "/dev/disk/by-id"
    basepath = Dir["#{dn}/**?"].select{|f| /\/#{name}$/.match(File.readlink(f))}
    basepath.map{|p| p.split("/")[2..-1].join("/")}
  end

  def _disk_path_by_name(name)
    dn = "/dev/disk/by-path"
    basepath = Dir["#{dn}/**?"].find{|f| /\/#{name}$/.match(File.readlink(f))}
    basepath.split("/")[2..-1].join("/") if basepath
  end

  def physical_data_storage_devices
    @blocks ||= []
    return @blocks unless @blocks.empty?

    @logger.debug("Trying to get list of physical devices")
    raise "Path /sys/block does not exist" unless File.exists?("/sys/block")

    Dir["/sys/block/*"].each do |block_device_dir|
      basename_dir = File.basename(block_device_dir)
      # Entries in /sys/block for cciss look like cciss!c0d1 while
      # the entries in /dev look like /dev/cciss/c0d1. udevadm uses
      # the entry in /dev so we need to replace the ! to get a valid
      # device name.
      devname = basename_dir.gsub(/!/, '/')

      @logger.debug("Getting udev properties for device: #{devname}")
      properties = `udevadm info --query=property --export --name=#{devname}`.inject({}) do |result, raw_propety|
       key, value = raw_propety.split(/\=/)
       result.update(key.strip => value.strip.chomp("'").reverse.chomp("'").reverse)
      end
      @logger.debug("Device #{devname} udev properties: #{properties.inspect}")

      @logger.debug("Trying to find out if device #{devname} is removable or not")
      if File.exists?("/sys/block/#{basename_dir}/removable")
        removable = File.open("/sys/block/#{basename_dir}/removable"){ |f| f.read_nonblock(1024).strip }
      end
      @logger.debug("Device #{devname} removable parameter: #{removable.inspect}")

      if STORAGE_CODES.include?(properties['MAJOR'].to_i)
        @logger.debug("Device #{devname} seems to be appropriate")
        # Exclude LVM volumes (in CentOS - 253, in Ubuntu - 252) using additional check
        unless properties['DEVPATH'].include?('virtual')
          @blocks << {
            :name => basename_dir,
            :disk => _disk_path_by_name(devname) || devname,
            :extra => _disk_id_by_name(devname) || [],
            :removable => removable,
          }
        end
      end
    end
    @logger.debug("Final list of physical devices is: #{@blocks.inspect}")
    @blocks
  end

  def _is_virtualbox
    @os[:dmi][:system][:product_name] == "VirtualBox" rescue false
  end

  def _is_virtual
    _is_virtualbox or @os[:virtualization][:role] == "guest" rescue false
  end

  def _manufacturer
    if _is_virtualbox
      @os[:dmi][:system][:product_name] rescue nil
    elsif _is_virtual
      @os[:virtualization][:system].upcase.strip rescue nil
    else
      @os[:dmi][:system][:manufacturer].strip rescue nil
    end
  end

  def _product_name
    unless _is_virtual
      @os[:dmi][:system][:product_name].strip rescue nil
    end
  end

  def _serial
    @os[:dmi][:system][:serial_number].strip rescue nil
  end

  # Returns unique identifier of machine
  # * for kvm virtual node will contain virsh UUID
  # * for physical HW that would be unique chassis id (from BIOS settings)
  # * for other hypervizors - not tested
  def uuid
    node_uuid = @os.fetch(:dmi, {}).fetch(:system, {}).fetch(:uuid, nil)
    node_uuid && node_uuid.strip
  end

  def _system_info
    {
      :manufacturer => _manufacturer,
      :serial => _serial,
      :uuid => uuid,
      :product => _product_name,
      :family => (@os[:dmi][:system][:family].strip rescue nil),
      :version => (@os[:dmi][:system][:version].strip rescue nil),
      :fqdn => (@os[:fqdn].strip rescue @os[:hostname].strip rescue nil),
    }.delete_if { |key, value| value.nil? or value.empty? or value == "Not Specified" }
  end

  def _size(size, unit)
    case unit
      when /^kb$/i
        size * 1024
      when /^mb$/i
        size * 1048576
      when /^gb$/i
        size * 1073741824
    end
  end

  def _dmi_memory
    dmi = `/usr/sbin/dmidecode`
    info = {:devices => [], :total => 0, :maximum_capacity => 0, :slots => 0}
    return nil if $?.to_i != 0
    dmi.split(/\n\n/).each do |group|
      if /^Physical Memory Array$/.match(group)
        if /^\s*Maximum Capacity:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
          info[:maximum_capacity] += _size($1.to_i, $2)
        end
        if /^\s*Number Of Devices:\s+(\d+)/i.match(group)
          info[:slots] += $1.to_i
        end
      elsif /^Memory Device$/.match(group)
        device_info = {}
        if /^\s*Size:\s+(\d+)\s+(mb|gb|kb)/i.match(group)
          size = _size($1.to_i, $2)
          device_info[:size] = size
          info[:total] += size
        else
          next
        end
        if /^\s*Speed:\s+(\d+)\s+MHz/i.match(group)
          device_info[:frequency] = $1.to_i
        end
        if /^\s*Type:\s+(.*?)$/i.match(group)
          device_info[:type] = $1
        end
        #if /^\s*Locator:\s+(.*?)$/i.match(group)
        #  device_info[:locator] = $1
        #end
        info[:devices].push(device_info)
      end
    end
    if info[:total] == 0
      nil
    else
      info
    end
  end

  def _ohai_memory
    info = {}
    size = @os['memory']['total'].gsub(/(kb|mb|gb)$/i, "").to_i rescue (return nil)
    info[:total] = _size(size, $1)
    info
  end



  def _master_ip_and_mac
    @os[:network][:interfaces].each do |_, intinfo|
      intinfo[:addresses].each do |k, v|
        # Here we need to check family because IPAddr.new with bad
        # data works very slow on some environments
        # https://bugs.launchpad.net/fuel/+bug/1284571
        if v[:family] == 'inet' && !(IPAddr.new(k) rescue nil).nil?
          net = IPAddr.new("#{k}/#{v[:netmask]}")
          if net.include? @api_ip
            mac = intinfo[:addresses].find { |_, info| info[:family] == 'lladdr' }[0]
            return {:ip => k, :mac => mac}
          end
        end
      end
    end
    {}
  end

  def _data
    res = {
      :mac => (@os[:macaddress] rescue nil),
      :ip  => (@os[:ipaddress] rescue nil),
      :os_platform => (@os[:platform] rescue nil),
    }
    begin
      detailed_data = _detailed
      master_data=_master_ip_and_mac
      res.merge!({
        :ip  => (( master_data[:ip] or @os[:ipaddress]) rescue nil),
        :mac  => (( master_data[:mac] or @os[:macaddress]) rescue nil),
        :manufacturer => _manufacturer,
        :platform_name => _product_name,
        :meta => detailed_data
      })
    rescue Exception => e
      @logger.error("Error '#{e.message}' in metadata calculation: #{e.backtrace}")
    end

    res[:status] = @node_state if @node_state
    res[:is_agent] = true
    res[:agent_checksum] = createsig(res)
    res
  end

  def update_state
    @node_state = nil
    if File.exist?("/etc/nailgun_systemtype")
      fl = File.open("/etc/nailgun_systemtype", "r")
      system_type = fl.readline.rstrip
      @node_state = "discover" if system_type == "bootstrap"
    end
  end
end

def write_data_to_file(logger, filename, data)
  if File.exist?(filename)
    File.open(filename, 'r') do |fo|
      text = fo.read
    end
  else
    text = ''
  end

  if text != data
    begin
      File.open(filename, 'w') do |fo|
        fo.write(data)
      end
      logger.info("Wrote data to file '#{filename}'. Data: #{data}")
    rescue Exception => e
      logger.warning("Can't write data to file '#{filename}'. Reason: #{e.message}")
    end
  else
    logger.info("File '#{filename}' is up to date.")
  end
end

logger = Logger.new(STDOUT)
logger.level = Logger::DEBUG

if File.exist?('/var/run/nodiscover')
  logger.info("Discover prevented by /var/run/nodiscover presence.")
  exit 1
end

begin
  logger.info("Trying to load agent config #{AGENT_CONFIG}")
  url = YAML.load_file(AGENT_CONFIG)['url']
  logger.info("Obtained service url from config file: '#{url}'")
rescue Exception => e
  logger.info("Could not get url from configuration file: #{e.message}, trying other ways..")
end

sleep_time = rand(30)
logger.debug("Sleep for #{sleep_time} seconds before sending request")
sleep(sleep_time)

agent = NodeAgent.new(logger, url)
agent.update_state

begin
  unless File.exist?('/etc/nailgun_uid')
    resp = agent.post
    # We must not log 409 as error, after node is provisioned there will be no
    # /etc/nailgun_uid, it will be created after put request
    if [409, 403].include? resp.status
      resp = agent.put
    end
  else
    resp = agent.put
    # Handle case when node was removed, but nailgun_uid exist
    if resp.status == 400
      resp = agent.post
    end
  end
  unless [201, 200].include? resp.status
    logger.error resp.body
    exit 1
  end
  new_id = JSON.parse(resp.body)['id']
  mc_config = McollectiveConfig.new(logger)
  mc_config.replace_identity(new_id)
  write_data_to_file(logger, '/etc/nailgun_uid', new_id.to_s)
rescue => ex
  # NOTE(mihgen): There is no need to retry - cron will do it for us
  logger.error "#{ex.message}\n#{ex.backtrace}"
end
