This time we’re going bigger than ever. Fabric, Power BI, SQL, AI and more. We're covering it all. You won't want to miss it.
Learn moreDid you hear? There's a new SQL AI Developer certification (DP-800). Start preparing now and be one of the first to get certified. Register now
Anomaly Detector, one of Azure AI services, enables you to monitor and detect anomalies in your time series data. This service is based on advanced algorithms, SR-CNN for univariate analysis and MTAD-GAT for multivariate analysis and is being retired by October 2026. In this blog post we will lay out a migration strategy to Microsoft Fabric, allowing you to detect identical anomalies, using the same algorithms as the old service, and even more.
Here are a few of the benefits of the strategy that we are about to lay out for you:
There are few options for time series anomaly detection in Fabric RTI (Real Time Intelligence):
In the following example we shall
Note that for the univariate model there is no need to train the model in a separate step (as the training is fast and done internally) and we can just predict.
Below we briefly present the steps, see Multivariate anomaly detection - Microsoft Fabric | Microsoft Learn for the detailed tutorial.
onelake_uri = "OneLakeTableURI" # Replace with your OneLake table URI
abfss_uri = convert_onelake_to_abfss(onelake_uri)
df = spark.read.format('delta').load(abfss_uri)
df = df.toPandas().set_index('Date')
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['AAPL'], mode='lines', name='AAPL'))
fig.add_trace(go.Scatter(x=df.index, y=df['AMZN'], mode='lines', name='AMZN'))
fig.add_trace(go.Scatter(x=df.index, y=df['GOOG'], mode='lines', name='GOOG'))
fig.add_trace(go.Scatter(x=df.index, y=df['MSFT'], mode='lines', name='MSFT'))
fig.add_trace(go.Scatter(x=df.index, y=df['SPY'], mode='lines', name='SPY'))
fig.update_layout(
title='Stock Prices change',
xaxis_title='Date',
yaxis_title='Change %',
legend_title='Tickers'
)
fig.show()
features_cols = ['AAPL', 'AMZN', 'GOOG', 'MSFT', 'SPY']
cutoff_date = pd.to_datetime('2023-01-01')
train_df = df[df.Date < cutoff_date]
import mlflow
from anomaly_detector import MultivariateAnomalyDetector
model = MultivariateAnomalyDetector()
sliding_window = 200
param s = {"sliding_window": sliding_window}
model.fit(train_df, params=params)
with mlflow.start_run():
mlflow.log_params(params)
mlflow.set_tag("Training Info", "MVAD on 5 Stocks Dataset")
model_info = mlflow.pyfunc.log_model(
python_model=model,
artifact_path="mvad_artifacts",
registered_model_name="mvad_5_stocks_model",
)
mi = mlflow.search_registered_models(filter_string="name='mvad_5_stocks_model'")[0]
model_abfss = mi.latest_versions[0].source
print(model_abfss)
.create-or-alter function with (folder = "Packages\\ML", docstring = "Predict MVAD model in Microsoft Fabric")
predict_fabric_mvad_fl(samples:(*), features_cols:dynamic, artifacts_uri:string, trim_result:bool=false)
{
let s = artifacts_uri;
let artifacts = bag_pack('MLmodel', strcat(s, '/MLmodel;impersonate'), 'conda.yaml', strcat(s, '/conda.yaml;impersonate'),
'requirements.txt', strcat(s, '/requirements.txt;impersonate'), 'python_env.yaml', strcat(s, '/python_env.yaml;impersonate'),
'python_model.pkl', strcat(s, '/python_model.pkl;impersonate'));
let kwargs = bag_pack('features_cols', features_cols, 'trim_result', trim_result);
let code = ```if 1:
import os
import shutil
import mlflow
model_dir = 'C:/Temp/mvad_model'
model_data_dir = model_dir + '/data'
os.mkdir(model_dir)
shutil.move('C:/Temp/MLmodel', model_dir)
shutil.move('C:/Temp/conda.yaml', model_dir)
shutil.move('C:/Temp/requirements.txt', model_dir)
shutil.move('C:/Temp/python_env.yaml', model_dir)
shutil.move('C:/Temp/python_model.pkl', model_dir)
features_cols = kargs["features_cols"]
trim_result = kargs["trim_result"]
test_data = df[features_cols]
model = mlflow.pyfunc.load_model(model_dir)
predictions = model.predict(test_data)
predict_result = pd.DataFrame(predictions)
samples_offset = len(df) - len(predict_result) # this model doesn't output predictions for the first sliding_window-1 samples
if trim_result: # trim the prefix samples
result = df[samples_offset:]
result.iloc[:,-4:] = predict_result.iloc[:, 1:] # no need to copy 1st column which is the timestamp index
else:
result = df # output all samples
result.iloc[samples_offset:,-4:] = predict_result.iloc[:, 1:]
```;
samples
| evaluate python(typeof(*), code, kwargs, external_artifacts=artifacts)
}
let cutoff_date=datetime(2023-01-01);
let num_predictions=toscalar(demo_stocks_change | where Date >= cutoff_date | count); // number of latest points to predict
let sliding_window=200; // should match the window that was set for model training
let prefix_score_len = sliding_window/2+min_of(sliding_window/2, 200)-1;
let num_samples = prefix_score_len + num_predictions;
demo_stocks_change
| top num_samples by Date desc
| order by Date asc
| extend is_anomaly=bool(false), score=real(null), severity=real(null), interpretation=dynamic(null)
| invoke predict_fabric_mvad_fl(pack_array('AAPL', 'AMZN', 'GOOG', 'MSFT', 'SPY'),
// NOTE: Update artifacts_uri to model path
artifacts_uri='enter your model URI here',
trim_result=true)
| summarize Date=make_list(Date), AAPL=make_list(AAPL), AMZN=make_list(AMZN), GOOG=make_list(GOOG), MSFT=make_list(MSFT), SPY=make_list(SPY), anomaly=make_list(toint(is_anomaly))
| render anomalychart with(anomalycolumns=anomaly, title='Stock Price Changest in % with Anomalies')
The addition of the time-series-anomaly-detector package to Fabric makes it the top platform for univariate & multivariate time series anomaly detection. Choose the anomaly detection method that best fits your scenario – from native KQL function for univariate analysis at scale, through standard multivariate analysis techniques and up to the best of breed time series anomaly detection algorithms implemented in the time-series-anomaly-detector package. For more information see the overview and tutorial.
You must be a registered user to add a comment. If you've already registered, sign in. Otherwise, register and sign in.