AI時代的網站與手機App建置與開發Part21 - 使用ML.NET偵測信用卡異常交易
· 摘要
信用卡異常交易偵測是機器學習很實際的應用, 信用卡發卡銀行成功偵測信用卡異常交易, 並且拒絕核淮, 不但能夠防止發卡銀行的損失, 而且可以提升信用卡持卡人對發卡銀行的信心. 反之, 如果信用卡發卡銀行未能成功偵測信用卡異常交易, 而逕予核淮, 不但可能必須負擔被盜刷的金額, 而且可能會損及信用卡持卡人的信心, 甚至停止和信用卡發卡銀行往來(例如停卡).
圖: 能夠有效降低信用卡發卡銀行有形損失和無形損失的信用卡異常交易偵測
在這篇文章中, 我們將要為大家介紹如何使用ML.NET信用卡異常交易偵測.
· 準備訓練資料
首先請啟動瀏覽器, 瀏覽至Credit Card Fraud Detection下載信用卡交易資料(下載網址: https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud).
圖1所示即為下載得到的資料:
圖1: 商品銷售資料節錄
這份信用卡交易資料的欄位詳細說明如下表:
|
欄位名稱 |
欄位說明 |
|
Time |
信用卡交易日期 |
|
V1~V28 |
信用卡交易的相關資訊. 因為信用卡交易相關資訊為需要保密的個資, 所以欄位名稱和欄位內容都己經過處理 |
|
Amount |
信用卡交易金額 |
|
Class |
信用卡交易分類, 0表示正常交易, 1表示異常交易 |
· 使用ML.NET執行信用卡異常交易偵測
首先請下載上述的信用卡交易資料並加入到使用Visual Studio建立的專案, 並於[屬性]視窗將加入到專案, 內含信用卡交易資料的檔案的[複製到輸出目錄]屬性的內容值設定為:有更新時才複製.
· 定義描述信用卡交易資料的ModelInput類別
public class
ModelInput
{
[ColumnName("Time"),
LoadColumn(0)]
public
float Time { get; set; }
[ColumnName("V1"),
LoadColumn(1)]
public
float V1 { get; set; }
[ColumnName("V2"),
LoadColumn(2)]
public
float V2 { get; set; }
[ColumnName("V3"),
LoadColumn(3)]
public
float V3 { get; set; }
[ColumnName("V4"),
LoadColumn(4)]
public
float V4 { get; set; }
[ColumnName("V5"),
LoadColumn(5)]
public
float V5 { get; set; }
[ColumnName("V6"),
LoadColumn(6)]
public
float V6 { get; set; }
[ColumnName("V7"),
LoadColumn(7)]
public
float V7 { get; set; }
[ColumnName("V8"),
LoadColumn(8)]
public
float V8 { get; set; }
[ColumnName("V9"),
LoadColumn(9)]
public
float V9 { get; set; }
[ColumnName("V10"),
LoadColumn(10)]
public
float V10 { get; set; }
[ColumnName("V11"),
LoadColumn(11)]
public
float V11 { get; set; }
[ColumnName("V12"),
LoadColumn(12)]
public
float V12 { get; set; }
[ColumnName("V13"),
LoadColumn(13)]
public
float V13 { get; set; }
[ColumnName("V14"),
LoadColumn(14)]
public
float V14 { get; set; }
[ColumnName("V15"),
LoadColumn(15)]
public
float V15 { get; set; }
[ColumnName("V16"),
LoadColumn(16)]
public
float V16 { get; set; }
[ColumnName("V17"),
LoadColumn(17)]
public
float V17 { get; set; }
[ColumnName("V18"),
LoadColumn(18)]
public
float V18 { get; set; }
[ColumnName("V19"),
LoadColumn(19)]
public
float V19 { get; set; }
[ColumnName("V20"),
LoadColumn(20)]
public
float V20 { get; set; }
[ColumnName("V21"),
LoadColumn(21)]
public
float V21 { get; set; }
[ColumnName("V22"),
LoadColumn(22)]
public
float V22 { get; set; }
[ColumnName("V23"),
LoadColumn(23)]
public
float V23 { get; set; }
[ColumnName("V24"),
LoadColumn(24)]
public
float V24 { get; set; }
[ColumnName("V25"),
LoadColumn(25)]
public
float V25 { get; set; }
[ColumnName("V26"),
LoadColumn(26)]
public
float V26 { get; set; }
[ColumnName("V27"),
LoadColumn(27)]
public
float V27 { get; set; }
[ColumnName("V28"),
LoadColumn(28)]
public
float V28 { get; set; }
[ColumnName("Amount"),
LoadColumn(29)]
public
float Amount { get; set; }
[ColumnName("Class"),
LoadColumn(30)]
public
bool Class { get; set; }
}
· 定義描述信用卡交易資料異常偵測結果的ModelOutput類別
public
class ModelOutput
{
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }
public float Score { get; set; }
}
· 實作信用卡交易異常偵測
private string TRAIN_DATA_FILEPATH = "creditcard.csv";
private MLContext mlContext = new MLContext(seed: 1);
private void btnDetectFraud_Click(object
sender, RoutedEventArgs e)
{
// 載入訓練資料
IDataView trainingDataView =
mlContext.Data.LoadFromTextFile<ModelInput>(
path: TRAIN_DATA_FILEPATH,
hasHeader: true,
separatorChar: ',',
allowQuoting: true,
allowSparse: false);
//建立訓練管線
IEstimator<ITransformer>
trainingPipeline = BuildTrainingPipeline(mlContext);
// 執行訓練
ITransformer mlModel =
TrainModel(mlContext, trainingDataView,
trainingPipeline);
//執行Cross
Validation
Evaluate(mlContext, trainingDataView,
trainingPipeline);
//取出第一筆訓練資料當做測試資料
ModelInput sampleData=
mlContext.Data.CreateEnumerable<ModelInput>(
trainingDataView,
false).First();
// 載入訓練妥的機器學習模型
var predEngine =
mlContext.Model.CreatePredictionEngine<ModelInput,
ModelOutput>(mlModel);
//輸入測試資料進行測試
ModelOutput
predictionResult = predEngine.Predict(sampleData);
//顯示測試結果
Trace.WriteLine($"\n\nActual
Class: {sampleData.Class} \nPredicted Class:
{predictionResult.Prediction}\n\n");
}
IEstimator<ITransformer>
BuildTrainingPipeline(MLContext mlContext)
{
//
準備欲分析的訓練資料欄位
var
dataProcessPipeline = mlContext.Transforms.Concatenate("Features", new[]
{ "Time", "V1", "V2", "V3", "V4",
"V5", "V6", "V7", "V8", "V9",
"V10", "V11", "V12", "V13", "V14",
"V15", "V16", "V17", "V18", "V19",
"V20", "V21", "V22", "V23", "V24",
"V25", "V26", "V27", "V28", "Amount"
});
// 選擇Microsoft研發的LightGBM(Light Gradient Boosting Machine)演算法,
// 提供良好的執行效率和記憶體使用量
var trainer =
mlContext.BinaryClassification.Trainers.LightGbm(
new LightGbmBinaryTrainer.Options()
{
NumberOfIterations
= 150,
LearningRate = 0.2001066f, NumberOfLeaves
= 7, MinimumExampleCountPerLeaf = 10,
UseCategoricalSplit = true,
HandleMissingValue = false,
MinimumExampleCountPerGroup = 100,
MaximumCategoricalSplitPointCount = 16, CategoricalSmoothing = 10,
L2CategoricalRegularization = 5,
Booster = new GradientBooster.Options() {
L2Regularization
= 1,
L1Regularization
= 0
},
LabelColumnName = "Class",
FeatureColumnName = "Features"
});
//串連訓練管線
var trainingPipeline =
dataProcessPipeline.Append(trainer);
return trainingPipeline;
}
void Evaluate(MLContext mlContext, IDataView
trainingDataView,
IEstimator<ITransformer>
trainingPipeline)
{
Trace.WriteLine("===
Cross-validating to get model's accuracy metrics ==");
//執行Cross
Validation
var crossValidationResults =
mlContext.BinaryClassification.
CrossValidateNonCalibrated(trainingDataView,
trainingPipeline, numberOfFolds: 5, labelColumnName: "Class");
//顯示Cross
Validation的結果
PrintBinaryClassificationFoldsAverageMetrics(crossValidationResults);
}
ITransformer TrainModel(MLContext
mlContext, IDataView trainingDataView,
IEstimator<ITransformer>
trainingPipeline)
{
Trace.WriteLine("===============
Training model ===============");
//使用訓練資料訓練機器學習模型
ITransformer model =
trainingPipeline.Fit(trainingDataView);
Trace.WriteLine("===============
End of training process ===============");
return model;
}
void
PrintBinaryClassificationFoldsAverageMetrics(
IEnumerable<TrainCatalogBase.CrossValidationResult<
BinaryClassificationMetrics>>
crossValResults)
{
//取得Cross
Validation每一回合的訓練結果
var metricsInMultipleFolds =
crossValResults.Select(r => r.Metrics);
//取得Cross
Validation每一回合的Accuracy值
var AccuracyValues =
metricsInMultipleFolds.Select(m => m.Accuracy);
//計算平均的Accuracy
var AccuracyAverage =
AccuracyValues.Average();
//依據平均的Accuracy計算標準差
var AccuraciesStdDeviation =
CalculateStandardDeviation(AccuracyValues);
//計算信心指數
var
AccuraciesConfidenceInterval95 =
CalculateConfidenceInterval95(AccuracyValues);
Trace.WriteLine($"*****************************************************");
Trace.WriteLine($"* Metrics for Binary Classification
model ");
Trace.WriteLine($"*----------------------------------------------------");
Trace.WriteLine($"* Average Accuracy: {AccuracyAverage:0.###} -
Standard deviation: ({AccuraciesStdDeviation:#.###}) - Confidence Interval 95%: ({AccuraciesConfidenceInterval95:#.###})");
Trace.WriteLine($"*****************************************************");
}
//計算標準差的函式
double CalculateStandardDeviation(IEnumerable<double>
values)
{
double
average = values.Average();
double sumOfSquaresOfDifferences =
values.Select(val => (
val
- average) * (val - average)).Sum();
double standardDeviation = Math.Sqrt(
sumOfSquaresOfDifferences
/ (values.Count() - 1));
return standardDeviation;
}
//計算信心指數的函式
double CalculateConfidenceInterval95(IEnumerable<double>
values)
{
double confidenceInterval95 = 1.96 *
CalculateStandardDeviation(values)
/ Math.Sqrt((values.Count() - 1));
return confidenceInterval95;
}
執行上述的程式碼首先顯示Cross Validation的結果, 如圖2所示:
圖2: Cross
Validation的結果
然後再顯示使用第一筆訓練資料當做測試資料測試訓練妥的機器學習模型的結果, 如圖3所示:
圖3: 使用第一筆訓練資料當做測試資料測試訓練妥的機器學習模型的結果
請注意圖3所示為測試資料為正常的信用卡交易(Class欄位內容值為False), 而機器學習模型預測的結果也是正常的信用卡交易(Class欄位內容值為False).
範例下載:


留言
張貼留言