Commit ca225fc1 by wangzh21

Add "Report device data to cyborg"

1. Load drivers with stevedore
2. Now update_usage just do the discover, add report data when
   conductor api ready.

Change-Id: Ia813c5a8dd8f29ce689204e52b6e1f691633f5fc
parent 34eac481
......@@ -18,31 +18,20 @@ Track resources like FPGA GPU and QAT for a host. Provides the
conductor with useful information about availability through the accelerator
model.
"""
from oslo_log import log as logging
from oslo_messaging.rpc.client import RemoteError
from oslo_utils import uuidutils
from stevedore import driver
from stevedore.extension import ExtensionManager
from cyborg.accelerator.drivers.fpga.base import FPGADriver
from cyborg.common import exception
from cyborg.common import utils
from cyborg import objects
from cyborg.conf import CONF
LOG = logging.getLogger(__name__)
AGENT_RESOURCE_SEMAPHORE = "agent_resources"
DEPLOYABLE_VERSION = "1.0"
# need to change the driver field name
DEPLOYABLE_HOST_MAPS = {"assignable": "assignable",
"address": "devices",
"board": "product_id",
"type": "function",
"vendor": "vendor_id",
"name": "name",
"interface_type": "interface_type"
}
class ResourceTracker(object):
"""Agent helper class for keeping track of resource usage as instances
......@@ -50,149 +39,35 @@ class ResourceTracker(object):
"""
def __init__(self, host, cond_api):
# FIXME (Shaohe) local cache for Accelerator.
# Will fix it in next release.
self.fpgas = None
self.host = host
self.conductor_api = cond_api
self.fpga_driver = FPGADriver()
@utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
def claim(self, context):
pass
def _fpga_compare_and_update(self, host_dev, acclerator):
need_updated = False
for k, v in DEPLOYABLE_HOST_MAPS.items():
if acclerator[k] != host_dev[v]:
need_updated = True
acclerator[k] = host_dev[v]
return need_updated
self.acc_drivers = []
self._initialize_drivers()
def _gen_accelerator_for_deployable(
self, context, name, vendor, productor, desc="", dev_type="pf",
acc_type="FPGA", acc_cap="", remotable=0):
def _initialize_drivers(self, enabled_drivers=[]):
"""
The type of the accelerator device, e.g GPU, FPGA, ...
acc_type defines the usage of the accelerator, e.g Crypto
acc_capability defines the specific capability, e.g AES
Load accelerator drivers.
:return: [nvidia_gpu_driver_obj, intel_fpga_driver_obj]
"""
db_acc = {
'deleted': False,
'uuid': uuidutils.generate_uuid(),
'name': name,
'description': desc,
'project_id': context.project_id,
'user_id': context.user_id,
'device_type': dev_type,
'acc_type': acc_type,
'acc_capability': acc_cap,
'vendor_id': vendor,
'product_id': productor,
'remotable': remotable
}
acc = objects.Accelerator(context, **db_acc)
acc = self.conductor_api.accelerator_create(context, acc)
return acc
def _gen_deployable_from_host_dev(self, host_dev, acc_id,
parent_uuid=None, root_uuid=None):
dep = {}
for k, v in DEPLOYABLE_HOST_MAPS.items():
dep[k] = host_dev[v]
dep["host"] = self.host
dep["version"] = DEPLOYABLE_VERSION
dep["availability"] = "free"
dep["uuid"] = uuidutils.generate_uuid()
dep["parent_uuid"] = parent_uuid
dep["root_uuid"] = root_uuid
dep["accelerator_id"] = acc_id
return dep
acc_drivers = []
if not enabled_drivers:
enabled_drivers = CONF.agent.enabled_drivers
valid_drivers = ExtensionManager(
namespace='cyborg.accelerator.driver').names()
for d in enabled_drivers:
if d not in valid_drivers:
raise exception.InvalidDriver(name=d)
acc_driver = driver.DriverManager(
namespace='cyborg.accelerator.driver', name=d,
invoke_on_load=True).driver
acc_drivers.append(acc_driver)
self.acc_drivers = acc_drivers
@utils.synchronized(AGENT_RESOURCE_SEMAPHORE)
def update_usage(self, context):
"""Update the resource usage and stats after a change in an
instance
"""Update the resource usage periodically.
"""
def create_deployable(fpgas, bdf, acc_id, parent_uuid=None):
fpga = fpgas[bdf]
dep = self._gen_deployable_from_host_dev(fpga, acc_id)
# if parent_uuid:
dep["parent_uuid"] = parent_uuid
obj_dep = objects.Deployable(context, **dep)
new_dep = self.conductor_api.deployable_create(context, obj_dep)
return new_dep
# NOTE(Shaohe Feng) need more agreement on how to keep consistency.
fpgas = self._get_fpga_devices()
bdfs = set(fpgas.keys())
deployables = self.conductor_api.deployable_get_by_host(
context, self.host)
# NOTE(Shaohe Feng) when no "address" in deployable?
accls = dict([(v["address"], v) for v in deployables])
accl_bdfs = set(accls.keys())
# Firstly update
for mutual in accl_bdfs & bdfs:
accl = accls[mutual]
if self._fpga_compare_and_update(fpgas[mutual], accl):
try:
self.conductor_api.deployable_update(context, accl)
except RemoteError as e:
LOG.error(e)
# Add
new = bdfs - accl_bdfs
new_pf = set([n for n in new if fpgas[n]["function"] == "pf"])
for n in new_pf:
fpga = fpgas[n]
acc = self._gen_accelerator_for_deployable(
context, fpga["name"], fpga["vendor_id"], fpga["product_id"],
"FPGA device on %s" % self.host, "pf", "FPGA")
new_dep = create_deployable(fpgas, n, acc.id)
accls[n] = new_dep
sub_vf = set()
if "regions" in n:
sub_vf = set([sub["devices"] for sub in fpgas[n]["regions"]])
for vf in sub_vf & new:
fpga = fpgas[n]
acc = self._gen_accelerator_for_deployable(
context, fpga["name"], fpga["vendor_id"],
fpga["product_id"], "FPGA device on %s" % self.host,
"vf", "FPGA")
new_dep = create_deployable(fpgas, vf, acc.id, new_dep["uuid"])
accls[vf] = new_dep
new.remove(vf)
for n in new - new_pf:
p_bdf = fpgas[n]["parent_devices"]
p_accl = accls[p_bdf]
p_uuid = p_accl["uuid"]
fpga = fpgas[n]
acc = self._gen_accelerator_for_deployable(
context, fpga["name"], fpga["vendor_id"], fpga["product_id"],
"FPGA device on %s" % self.host, "pf", "FPGA")
new_dep = create_deployable(fpgas, n, acc.id, p_uuid)
# Delete
for obsolete in accl_bdfs - bdfs:
try:
self.conductor_api.deployable_delete(context, accls[obsolete])
except RemoteError as e:
LOG.error(e)
del accls[obsolete]
def _get_fpga_devices(self):
def form_dict(devices, fpgas):
for v in devices:
fpgas[v["devices"]] = v
if "regions" in v:
form_dict(v["regions"], fpgas)
fpgas = {}
vendors = self.fpga_driver.discover_vendors()
for v in vendors:
driver = self.fpga_driver.create(v)
form_dict(driver.discover(), fpgas)
return fpgas
acc_list = []
for acc_driver in self.acc_drivers:
acc_list.extend(acc_driver.discover())
# Call conductor_api here to diff and report acc data.
......@@ -327,3 +327,7 @@ class ImageNotFound(NotFound):
class ImageBadRequest(Invalid):
msg_fmt = _("Request of image %(image_id)s got BadRequest response: "
"%(response)s")
class InvalidDriver(Invalid):
_msg_fmt = _("Found an invalid driver: %(name)s")
......@@ -15,6 +15,7 @@
from oslo_config import cfg
from cyborg.conf import agent
from cyborg.conf import api
from cyborg.conf import database
from cyborg.conf import default
......@@ -25,6 +26,7 @@ from cyborg.conf import keystone
CONF = cfg.CONF
api.register_opts(CONF)
agent.register_opts(CONF)
database.register_opts(CONF)
default.register_opts(CONF)
default.register_placement_opts(CONF)
......
# Copyright 2018 Beijing Lenovo Software Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
from oslo_config import cfg
from cyborg.common.i18n import _
opts = [
cfg.ListOpt('enabled_drivers',
default=[],
help=_('The accelerator drivers enabled on this agent. Such '
'as intel_fpga_driver, nvidia_gpu_driver, etc.')),
]
opt_group = cfg.OptGroup(name='agent',
title='Options for the cyborg-agent service')
AGENT_OPTS = (opts)
def register_opts(conf):
conf.register_group(opt_group)
conf.register_opts(opts, group=opt_group)
def list_opts():
return {
opt_group: AGENT_OPTS
}
......@@ -15,18 +15,11 @@
"""Cyborg agent resource_tracker test cases."""
import os
import fixtures
from cyborg.accelerator.drivers.fpga import utils
from cyborg.accelerator.drivers.fpga.intel import sysinfo
from cyborg.agent.resource_tracker import ResourceTracker
from cyborg.common import exception
from cyborg.conductor import rpcapi as cond_api
from cyborg.conf import CONF
from cyborg.tests import base
from cyborg.tests.unit.accelerator.drivers.fpga.intel import prepare_test_data
class TestResourceTracker(base.TestCase):
......@@ -34,22 +27,10 @@ class TestResourceTracker(base.TestCase):
def setUp(self):
super(TestResourceTracker, self).setUp()
self.syspath = sysinfo.SYS_FPGA
sysinfo.SYS_FPGA = "/sys/class/fpga"
tmp_sys_dir = self.useFixture(fixtures.TempDir())
prepare_test_data.create_fake_sysfs(tmp_sys_dir.path)
sysinfo.SYS_FPGA = os.path.join(
tmp_sys_dir.path, sysinfo.SYS_FPGA.split("/", 1)[-1])
utils.SYS_FPGA_PATH = sysinfo.SYS_FPGA
self.host = CONF.host
self.cond_api = cond_api.ConductorAPI()
self.rt = ResourceTracker(self.host, self.cond_api)
def tearDown(self):
super(TestResourceTracker, self).tearDown()
sysinfo.SYS_FPGA = self.syspath
utils.SYS_FPGA_PATH = self.syspath
def test_update_usage(self):
"""Update the resource usage and stats after a change in an
instance
......@@ -58,50 +39,13 @@ class TestResourceTracker(base.TestCase):
# has stored into DB by conductor correctly?
pass
def test_get_fpga_devices(self):
expect = {
'0000:5e:00.0': {
'function': 'pf',
'assignable': False,
'pr_num': '1',
'name': 'intel-fpga-dev.0',
'interface_type': 'pci',
'vendor_id': '0x8086',
'devices': '0000:5e:00.0',
'regions': [{
'function': 'vf',
'assignable': True,
'name': 'intel-fpga-dev.2',
'interface_type': 'pci',
'vendor_id': '0x8086',
'devices': '0000:5e:00.1',
'parent_devices': '0000:5e:00.0',
'path': '%s/intel-fpga-dev.2' % sysinfo.SYS_FPGA,
'product_id': '0xbcc1'}],
'parent_devices': '',
'path': '%s/intel-fpga-dev.0' % sysinfo.SYS_FPGA,
'product_id': '0xbcc0'},
'0000:5e:00.1': {
'function': 'vf',
'assignable': True,
'name': 'intel-fpga-dev.2',
'interface_type': 'pci',
'vendor_id': '0x8086',
'devices': '0000:5e:00.1',
'parent_devices': '0000:5e:00.0',
'path': '%s/intel-fpga-dev.2' % sysinfo.SYS_FPGA,
'product_id': '0xbcc1'},
'0000:be:00.0': {
'function': 'pf',
'assignable': True,
'pr_num': '0',
'name': 'intel-fpga-dev.1',
'interface_type': 'pci',
'vendor_id': '0x8086',
'devices': '0000:be:00.0',
'parent_devices': '',
'path': '%s/intel-fpga-dev.1' % sysinfo.SYS_FPGA,
'product_id': '0xbcc0'}}
def test_initialize_acc_drivers(self):
enabled_drivers = ['intel_fpga_driver']
self.rt._initialize_drivers(enabled_drivers=enabled_drivers)
drivers = self.rt.acc_drivers
self.assertEqual(len(drivers), len(enabled_drivers))
fpgas = self.rt._get_fpga_devices()
self.assertDictEqual(expect, fpgas)
def test_initialize_invalid_driver(self):
enabled_drivers = ['invalid_driver']
self.assertRaises(exception.InvalidDriver, self.rt._initialize_drivers,
enabled_drivers)
......@@ -45,6 +45,10 @@ wsgi_scripts =
cyborg.database.migration_backend =
sqlalchemy = cyborg.db.sqlalchemy.migration
cyborg.accelerator.driver =
intel_fpga_driver = cyborg.accelerator.drivers.fpga.intel.driver:IntelFPGADriver
nvmf_spdk_driver = cyborg.accelerator.drivers.spdk.nvmf.nvmf:NVMFDRIVER
oslo.config.opts =
cyborg = cyborg.conf.opts:list_opts
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment