RDC REST API (Sample code)
Please follow the README file Update README_rdc_rest_api.txt Update RDC_REST_API.py Error handling updates Updates for error handling Updates Updates for rdc_field_watch/rdc_field_unwatch and delete query Updates for rdc_field_watch/rdc_field_unwatch and delete query SWDEV-479738 [RDC] - Rest API Delete python_binding/RDC_REST_API.py new rdc_rest_api.py file for SWDEV-479738 [RDC] - Rest API
This commit is contained in:
committed by
Galantsev, Dmitrii
vanhempi
e847f74f78
commit
cf566ebd31
@@ -0,0 +1,132 @@
|
||||
from flask import Flask, request, jsonify
|
||||
from RdcReader import RdcReader
|
||||
from RdcUtil import RdcUtil
|
||||
from rdc_bootstrap import *
|
||||
|
||||
# Initialize Flask app
|
||||
app = Flask(__name__)
|
||||
|
||||
# Initialize RDC Reader and Utilities for handling GPU queries
|
||||
rdc_reader = RdcReader(ip_port=None)
|
||||
rdc_util = RdcUtil()
|
||||
|
||||
# Dictionary to store query criteria with query_id
|
||||
gpu_queries = {}
|
||||
|
||||
# Endpoint to discover available GPUs
|
||||
@app.route('/rdc/discovery', methods=['GET'])
|
||||
def discover_gpus():
|
||||
"""Retrieve a list of available GPUs and their names."""
|
||||
try:
|
||||
gpu_indexes = rdc_util.get_all_gpu_indexes(rdc_reader.rdc_handle)
|
||||
gpus = {}
|
||||
for gpu in gpu_indexes:
|
||||
device_attr = rdc_device_attributes_t()
|
||||
rdc.rdc_device_get_attributes(rdc_reader.rdc_handle, gpu, device_attr)
|
||||
gpus[gpu] = device_attr.device_name.decode('utf-8') # Decode GPU name from bytes
|
||||
return jsonify(gpus)
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Endpoint to create a new query criteria
|
||||
@app.route('/rdc/query_criteria', methods=['POST'])
|
||||
def create_query_criteria():
|
||||
"""Define a new query criteria specifying GPU indices and metrics to monitor."""
|
||||
try:
|
||||
data = request.json
|
||||
if not data or "metrics" not in data:
|
||||
return jsonify({"error": "Invalid request payload"}), 400
|
||||
|
||||
gpu_indexes = data.get("gpu_index", rdc_util.get_all_gpu_indexes(rdc_reader.rdc_handle))
|
||||
metrics = data.get("metrics", [])
|
||||
|
||||
# Create rdc group and fieldgroup
|
||||
gpu_group_id, _ = rdc_util.create_gpu_group(rdc_reader.rdc_handle, b"query_gpu_group", gpu_indexes)
|
||||
field_group_id, _ = rdc_util.create_field_group(rdc_reader.rdc_handle, b"query_field_group", [rdc.get_field_id_from_name(m.encode('utf-8')).value for m in metrics])
|
||||
|
||||
# Call rdc_field_watch to start fetching metrics into cache
|
||||
result = rdc.rdc_field_watch(rdc_reader.rdc_handle, gpu_group_id, field_group_id, 1000000, 3600.0, 1000)
|
||||
if rdc_status_t(result) != rdc_status_t.RDC_ST_OK:
|
||||
return jsonify({"error": "Failed to watch fields"}), 500
|
||||
|
||||
query_id = f"G-{gpu_group_id.value}-F-{field_group_id.value}"
|
||||
gpu_queries[query_id] = {"gpu_index": gpu_indexes, "metrics": metrics, "query_id": query_id}
|
||||
return jsonify({"query_id": query_id})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Endpoint to get all query criteria
|
||||
@app.route('/rdc/query_criteria', methods=['GET'])
|
||||
def get_all_query_criteria():
|
||||
"""Retrieve all stored query criteria for all GPUs."""
|
||||
try:
|
||||
query_id = request.args.get("query_id")
|
||||
if query_id:
|
||||
return jsonify(gpu_queries.get(query_id, {}))
|
||||
return jsonify(list(gpu_queries.values()))
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Endpoint to retrieve a specific query criteria
|
||||
@app.route('/rdc/query_criteria/<query_id>', methods=['GET'])
|
||||
def get_query_criteria(query_id):
|
||||
"""Retrieve query criteria based on a given query ID."""
|
||||
try:
|
||||
if query_id in gpu_queries:
|
||||
return jsonify(gpu_queries[query_id])
|
||||
return jsonify({"error": "Query ID not found"}), 404
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Endpoint to delete a specific query criteria
|
||||
@app.route('/rdc/query_criteria/<query_id>', methods=['DELETE'])
|
||||
def delete_query_criteria(query_id):
|
||||
"""Delete a query criteria using its query ID."""
|
||||
try:
|
||||
if query_id in gpu_queries:
|
||||
gpu_group_id = rdc_reader.field_group_id
|
||||
field_group_id = rdc_reader.field_group_id
|
||||
|
||||
# Call rdc_field_unwatch to stop fetching metrics
|
||||
result = rdc.rdc_field_unwatch(rdc_reader.rdc_handle, gpu_group_id, field_group_id)
|
||||
if rdc_status_t(result) != rdc_status_t.RDC_ST_OK:
|
||||
return jsonify({"error": "Failed to unwatch fields"}), 500
|
||||
|
||||
# Delete GPU and field groups
|
||||
rdc.rdc_group_gpu_destroy(rdc_reader.rdc_handle, gpu_group_id)
|
||||
rdc.rdc_group_field_destroy(rdc_reader.rdc_handle, field_group_id)
|
||||
|
||||
# Remove the query from storage
|
||||
del gpu_queries[query_id]
|
||||
return jsonify({"message": "Deleted successfully"})
|
||||
return jsonify({"error": "Query ID not found"}), 404
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Endpoint to fetch GPU metrics for a specific query ID
|
||||
@app.route('/rdc/gpu_metrics/<query_id>', methods=['GET'])
|
||||
def get_gpu_metrics(query_id):
|
||||
"""Retrieve GPU metrics based on the query ID."""
|
||||
try:
|
||||
if query_id not in gpu_queries:
|
||||
return jsonify({"error": "Query ID not found"}), 404
|
||||
|
||||
query = gpu_queries[query_id]
|
||||
gpu_metrics = [] # List to store GPU metric results
|
||||
for gpu in query["gpu_index"]:
|
||||
gpu_data = {"gpu_index": gpu} # Store GPU index in the response
|
||||
for metric in query["metrics"]:
|
||||
field_id = rdc.get_field_id_from_name(metric.encode('utf-8')).value
|
||||
value = rdc_field_value()
|
||||
result = rdc.rdc_field_get_latest_value(rdc_reader.rdc_handle, gpu, field_id, value)
|
||||
if rdc_status_t(result) == rdc_status_t.RDC_ST_OK:
|
||||
gpu_data[metric] = value.value.l_int # Store metric value
|
||||
gpu_metrics.append(gpu_data) # Append GPU data to results
|
||||
return jsonify(gpu_metrics)
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
# Main entry point to start the Flask server
|
||||
if __name__ == '__main__':
|
||||
# Runs the API server, making it accessible on all network interfaces
|
||||
app.run(host='0.0.0.0', port=50052)
|
||||
Viittaa uudesa ongelmassa
Block a user