#!/usr/bin/perl -w

# Resource Agent for managing PVE VMs (openvz and qemu-kvm)
#
# License: GNU Affero General Public License (AGPL3)
# Copyright (C) 2011 Proxmox Server Solutions GmbH

use strict;
use File::Basename;
use File::Copy;
use PVE::Tools;
use PVE::ProcFSTools;
use PVE::Cluster;
use PVE::INotify;
use PVE::RPCEnvironment;
use PVE::OpenVZ;
use PVE::API2::OpenVZ;
use PVE::QemuServer;
use PVE::API2::Qemu;

use constant OCF_SUCCESS => 0;
use constant OCF_ERR_GENERIC => 1;
use constant OCF_ERR_ARGS => 2;
use constant OCF_ERR_UNIMPLEMENTED => 3;
use constant OCF_ERR_PERM => 4;
use constant OCF_ERR_INSTALLED => 5;
use constant OCF_ERR_CONFIGURED => 6;
use constant OCF_NOT_RUNNING => 7;
use constant OCF_RUNNING_MASTER => 8;
use constant OCF_FAILED_MASTER => 9;

$ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';

my $ocf_ressource_type = 'pvevm';

my $prio_hash = {
    err => 3,
    note => 5,
    info => 6,
    debug => 7,
};

$SIG{__DIE__} = sub {
    die @_ if $^S; # skip if inside eval
    $! = OCF_ERR_GENERIC;
    ocf_log('err', @_);
    exit($!);
};

if ($> != 0) {
    print STDERR "Cannot control VMs. as non-root user.\n";
    exit(OCF_ERR_PERM);
}

PVE::INotify::inotify_init();

my $rpcenv = PVE::RPCEnvironment->init('ha');

$rpcenv->init_request();
$rpcenv->set_language($ENV{LANG});
$rpcenv->set_user('root@pam'); 

my $nodename = PVE::INotify::nodename();

my @ssh_opts = ('-o', 'BatchMode=yes');
my @ssh_cmd = ('ssh', @ssh_opts);

sub ocf_log {
    my ($level, $msg) = @_;

    chomp $msg;    
    print "$level: $msg\n";

    my $level_n = $prio_hash->{$level};
    $level_n = $prio_hash->{note} if !defined($level_n);

    my $cmd = ['clulog', '-m', $ocf_ressource_type, '-s', $level_n, $msg];

    eval { PVE::Tools::run_command($cmd); }; # ignore errors
}

sub get_timeout {
    my $default_timeout = 60;
    my $tout = $default_timeout;

    if ($ENV{OCF_RESKEY_RGMANAGER_meta_timeout}) {
	$tout = $ENV{OCF_RESKEY_RGMANAGER_meta_timeout};
    } elsif ($ENV{OCF_RESKEY_CRM_meta_timeout}) {
	$tout = $ENV{OCF_RESKEY_CRM_meta_timeout};
    }

    return $default_timeout if $tout <= 0;

    return $tout;
}

sub check_running {
    my ($status, $verbose) = @_;

    if ($status->{type} eq 'qemu') {
	$status->{running} = PVE::QemuServer::check_running($status->{vmid}, 1);
    } elsif ($status->{type} eq 'openvz') {
	$status->{running} = PVE::OpenVZ::check_running($status->{vmid});
    } else {
	die "got strange VM type '$status->{type}'\n";
    }
}

sub validate_all {
    my $status = {};

    eval {

	my $vmid = $ENV{OCF_RESKEY_vmid};
	die "no VMID specified\n" if !defined($vmid);
	die "got invalid VMID '$vmid'\n" if $vmid !~ m/^[1-9]\d*$/;

	my $vmlist = PVE::Cluster::get_vmlist();
	die "got empty cluster VM list\n" if !$vmlist || !$vmlist->{ids};
	my $data = $vmlist->{ids}->{$vmid};
	die "VM $vmid does not exist\n" if !$data;

	$status->{vmid} = $vmid;
	$status->{type} = $data->{type};
	$status->{node} = $data->{node};

	if ($status->{type} eq 'qemu') {
	    $status->{name} = "VM $vmid";
	} else {
	    $status->{name} = "CT $vmid";
	}

	check_running($status);
    };
    if (my $err = $@) {
	ocf_log('err', $err);
	exit(OCF_ERR_ARGS);
    }

    return $status;
}

sub upid_wait {
    my ($upid) = @_;

    my $task = PVE::Tools::upid_decode($upid);

    sleep(1);
    while (PVE::ProcFSTools::check_process_running($task->{pid}, $task->{pstart})) {
	ocf_log('debug', "Task still active, waiting");
	sleep(1);
    }
}

sub copy_scripts {
    my ($vmid, $oldconfig, $newconfig) = @_;

    my $oldcfgdir = dirname($oldconfig);
    my $newcfgdir = dirname($newconfig);

    my $newfiles = [];
    my $oldfiles = [];
    eval {
	foreach my $s (PVE::OpenVZ::SCRIPT_EXT) {
	    my $scriptfn = "${vmid}.$s";
	    my $oldfn = "$oldcfgdir/$scriptfn";
	    next if ! -f $oldfn;
	    my $dstfn = "$newcfgdir/$scriptfn";
	    push @$oldfiles, $oldfn;
	    push @$newfiles, $dstfn;
	    copy($oldfn, $dstfn) || die "copy '$oldfn' to '$dstfn' failed - $!\n";
	}
    };
    if (my $err = $@) {
	foreach my $fn (@$newfiles) {
	    unlink($fn);
	}
	die $err;
    }

    return ($newfiles, $oldfiles);
}

my $cmd = shift || '';
my $migratetarget = shift if $cmd eq 'migrate';

die "too many arguments\n" if scalar (@ARGV) != 0;

if ($cmd eq 'start') {
    my $status = validate_all();
    if ($status->{running}) {
	ocf_log('info', "$status->{name} is already running");
	exit(OCF_SUCCESS);
    }

    if ($status->{node} ne $nodename) {
	ocf_log('info', "Move config for $status->{name} to local node");
	my ($oldconfig, $newconfig, $oldfiles, $newfiles);
	if ($status->{type} eq 'qemu') {
	    $oldconfig = PVE::QemuServer::config_file($status->{vmid}, $status->{node});
	    $newconfig = PVE::QemuServer::config_file($status->{vmid}, $nodename);
	} else {
	    $oldconfig = PVE::OpenVZ::config_file($status->{vmid}, $status->{node});
	    $newconfig = PVE::OpenVZ::config_file($status->{vmid}, $nodename);

	    eval { ($newfiles, $oldfiles) = copy_scripts($status->{vmid}, $oldconfig, $newconfig); };
	    if (my $err = $@) {
		ocf_log('err', "unable to move config scripts: $err");
		exit(OCF_ERR_GENERIC);
	    } 
	}
	if (!rename($oldconfig, $newconfig)) {
	    ocf_log('err', "unable to move config file from '$oldconfig' to '$newconfig' - $!");
	    if ($newfiles) {
		foreach my $fn (@$newfiles) {
		    unlink($fn);
		}
	    }
	    exit(OCF_ERR_GENERIC);
	}
	if ($oldfiles) {
	    foreach my $fn (@$oldfiles) {
		unlink($fn);
	    }
	}
    }

    my $upid;
    
    if ($status->{type} eq 'qemu') {
	$upid = PVE::API2::Qemu->vm_start({node => $nodename, vmid => $status->{vmid}});
    } else {
	$upid = PVE::API2::OpenVZ->vm_start({node => $nodename, vmid => $status->{vmid}});
    }

    upid_wait($upid);

    check_running($status);

    exit(OCF_ERR_GENERIC) if !$status->{running};

    if (my $testprog = $ENV{OCF_RESKEY_status_program}) {

	my $timeout = get_timeout();

	my $wait_func = sub {
	    while (system($testprog) != 0) { sleep(3); }
	};

	eval { PVE::Tools::run_with_timeout($timeout, $wait_func); };
	if (my $err = $@) {
	    ocf_log('err', "Start of $status->{name} has failed");
	    ocf_log('err', "error while waiting for '$testprog' - $err");
	    exit(OCF_ERR_GENERIC);
	}
    }

    exit(OCF_SUCCESS);

} elsif($cmd eq 'stop') {
    my $status = validate_all();

    if (!$status->{running}) {
	ocf_log('info', "$status->{name} is already stopped");
	exit(OCF_SUCCESS);
    }

    my $timeout = get_timeout();

    my $upid;

    my $param = {
	node => $nodename, 
	vmid => $status->{vmid}, 
	timeout => $timeout,
	forceStop => 1,
    };

    if ($status->{type} eq 'qemu') {
	$upid = PVE::API2::Qemu->vm_shutdown($param);
    } else {
	$upid = PVE::API2::OpenVZ->vm_shutdown($param);
    }

    upid_wait($upid);

    check_running($status);

    exit($status->{running} ? OCF_ERR_GENERIC : OCF_SUCCESS);

} elsif($cmd eq 'recover' || $cmd eq 'restart' || $cmd eq 'reload') {

    exit(OCF_SUCCESS);

} elsif($cmd eq 'status' || $cmd eq 'monitor') {

    my $status = validate_all();

    if (!$status->{running}) {
	ocf_log('debug', "$status->{name} is not running");
	exit(OCF_NOT_RUNNING);
    }

    ocf_log('debug', "$status->{name} is running");

    my $testprog = $ENV{OCF_RESKEY_status_program};
    my $checklevel = $ENV{OCF_CHECK_LEVEL};

    if ($testprog && $checklevel && $checklevel >= 10) {
	if (system($testprog) != 0) {
	    exit(OCF_NOT_RUNNING);
	}
    }

    exit(OCF_SUCCESS);

} elsif($cmd eq 'migrate') {
    my $status = validate_all();
    if (!$status->{running}) {
	ocf_log('err', "$status->{name} is not running");
	exit(OCF_ERR_GENERIC);
    }

    if (!$migratetarget) {
	ocf_log('err', "No target specified");
	exit(OCF_ERR_ARGS);

    };

    my $upid;
    my $params = {
	node => $nodename, 
	vmid => $status->{vmid},
	target => $migratetarget,
	online => 1,
    };

    my $oldconfig;
    if ($status->{type} eq 'qemu') {
	$oldconfig = PVE::QemuServer::config_file($status->{vmid}, $status->{node});
	$upid = PVE::API2::Qemu->migrate_vm($params);
    } else {
	$oldconfig = PVE::OpenVZ::config_file($status->{vmid}, $status->{node});
	$upid = PVE::API2::OpenVZ->migrate_vm($params);
    }

    upid_wait($upid);

    # something went wrong if old config file is still there
    exit((-f $oldconfig) ? OCF_ERR_GENERIC : OCF_SUCCESS);

} elsif($cmd eq 'stop') {
    my $status = validate_all();

    if (!$status->{running}) {
	ocf_log('info', "$status->{name} is already stopped");
	exit(OCF_SUCCESS);
    }

    my $upid;
    
    if ($status->{type} eq 'qemu') {
	$upid = PVE::API2::Qemu->vm_stop({node => $nodename, vmid => $status->{vmid}});
    } else {
	$upid = PVE::API2::OpenVZ->vm_stop({node => $nodename, vmid => $status->{vmid}, fast => 1});
    }

    upid_wait($upid);

    die "implement me";

} elsif($cmd eq 'reconfig') {
    # Reconfigure a running VM
    my $status = validate_all();

    # we do nothing here

} elsif($cmd eq 'meta-data') {
    while(<DATA>) {
	print;
    }
} elsif($cmd eq 'validate-all') {
    my $status = validate_all();
} else {
    die "usage: $0 {start|stop|restart|status|reload|reconfig|meta-data|validate-all}\n";
}
			
exit(OCF_SUCCESS);

__DATA__
<?xml version="1.0"?>
<resource-agent version="rgmanager 2.0" name="pvevm">
    <version>1.0</version>

    <longdesc lang="en">
	Defines a PVE Virtual Machine
    </longdesc>
    <shortdesc lang="en">
        Defines a PVE Virtual Machine
    </shortdesc>

    <parameters>
        <parameter name="vmid" primary="1">
            <longdesc lang="en">
                This is the VMID of the virtual machine.
            </longdesc>
            <shortdesc lang="en">
                VMID
            </shortdesc>
            <content type="string"/>
        </parameter>
    
        <parameter name="domain" reconfig="1">
            <longdesc lang="en">
                Failover domains define lists of cluster members
                to try in the event that the host of the virtual machine
		fails.
            </longdesc>
            <shortdesc lang="en">
                Cluster failover Domain
            </shortdesc>
            <content type="string"/>
        </parameter>

        <parameter name="autostart" reconfig="1">
            <longdesc lang="en">
	    	If set to yes, this resource group will automatically be started
		after the cluster forms a quorum.  If set to no, this virtual
		machine will start in the 'disabled' state after the cluster
		forms a quorum.
            </longdesc>
            <shortdesc lang="en">
	    	Automatic start after quorum formation
            </shortdesc>
            <content type="boolean" default="1"/>
        </parameter>

        <parameter name="exclusive" reconfig="1">
            <longdesc lang="en">
	    	If set, this resource group will only relocate to
		nodes which have no other resource groups running in the
		event of a failure.  If no empty nodes are available,
		this resource group will not be restarted after a failure.
		Additionally, resource groups will not automatically
		relocate to the node running this resource group.  This
		option can be overridden by manual start and/or relocate
		operations.
            </longdesc>
            <shortdesc lang="en">
	        Exclusive resource group
            </shortdesc>
            <content type="boolean" default="0"/>
        </parameter>

        <parameter name="recovery" reconfig="1">
            <longdesc lang="en">
	        This currently has three possible options: "restart" tries
		to restart this virtual machine locally before
		attempting to relocate (default); "relocate" does not bother
		trying to restart the VM locally; "disable" disables
		the VM if it fails.
            </longdesc>
            <shortdesc lang="en">
	    	Failure recovery policy
            </shortdesc>
            <content type="string"/>
        </parameter>

	<parameter name="migrate">
	    <longdesc lang="en">
	    	Migration type (live or pause, default = live).
	    </longdesc>
	    <shortdesc lang="en">
	    	Migration type (live or pause, default = live).
	    </shortdesc>
            <content type="string" default="live"/>
        </parameter>

        <parameter name="depend">
            <longdesc lang="en">
		Service dependency; will not start without the specified
		service running.
            </longdesc>
            <shortdesc lang="en">
		Top-level service this depends on, in service:name format.
            </shortdesc>
            <content type="string"/>
        </parameter>

        <parameter name="depend_mode">
            <longdesc lang="en">
		Service dependency mode.
		hard - This service is stopped/started if its dependency
		       is stopped/started
		soft - This service only depends on the other service for
		       initial startip.  If the other service stops, this
		       service is not stopped.
            </longdesc>
            <shortdesc lang="en">
	    	Service dependency mode (soft or hard).
            </shortdesc>
            <content type="string" default="hard"/>
        </parameter>

        <parameter name="max_restarts" reconfig="1">
            <longdesc lang="en">
	    	Maximum restarts for this service.
            </longdesc>
            <shortdesc lang="en">
	    	Maximum restarts for this service.
            </shortdesc>
            <content type="string" default="0"/>
        </parameter>

        <parameter name="restart_expire_time" reconfig="1">
            <longdesc lang="en">
	    	Restart expiration time.  A restart is forgotten
		after this time.  When combined with the max_restarts
		option, this lets administrators specify a threshold
		for when to fail over services.  If max_restarts
		is exceeded in this given expiration time, the service
		is relocated instead of restarted again.
            </longdesc>
            <shortdesc lang="en">
	    	Restart expiration time; amount of time before a restart
		is forgotten.
            </shortdesc>
            <content type="string" default="0"/>
        </parameter>

        <parameter name="status_program" reconfig="1">
            <longdesc lang="en">
	    	Ordinarily, only the presence/health of a virtual machine
		is checked.  If specified, the status_program value is
		executed during a depth 10 check.  The intent of this 
		program is to ascertain the status of critical services
		within a virtual machine.
            </longdesc>
            <shortdesc lang="en">
	    	Additional status check program
            </shortdesc>
            <content type="string" default=""/>
        </parameter>
    </parameters>

    <actions>
        <action name="start" timeout="75"/>
        <action name="stop" timeout="75"/>
	
        <action name="status" timeout="10" interval="30"/>
        <action name="monitor" timeout="10" interval="30"/>

	<!-- depth 10 calls the status_program -->
        <action name="status" depth="10" timeout="20" interval="60"/>
        <action name="monitor" depth="10" timeout="20" interval="60"/>

	<!-- reconfigure - reconfigure with new OCF parameters.
	     NOT OCF COMPATIBLE AT ALL -->
	<action name="reconfig" timeout="10"/>

	<action name="migrate" timeout="10m"/>

        <action name="meta-data" timeout="5"/>
        <action name="validate-all" timeout="5"/>

    </actions>
    
    <special tag="rgmanager">
    	<!-- Destroy_on_delete / init_on_add are currently only
	     supported for migratory resources (no children
	     and the 'migrate' action; see above.  Do not try this
	     with normal services -->
        <attributes maxinstances="1" destroy_on_delete="0" init_on_add="0"/>
    </special>
</resource-agent>

