.NET
Performance issue Multi-thread
▌背景
某專案中為了QC寫的程式, 資料量從2萬以下/day 暴增為 約2百萬/day, 在做資料庫(On my notebook) insert時當然慘不忍睹。 最後透過利用以下方式將一天資料的insert時間從 200 min 調至約 30 min 可執行完畢。
l 利用Visual Studio內建的「效能及診斷工具」掃描程式的bottleneck
l 將資料切成多份以多條執行緒送到DB做batch insert
|
▌環境
l Windows 7 pro
l Intel i5 2.4GHZ 2
Cores and 4 logical threads
l 16G RAM
|
▌Performance tunning
▋效能及診斷工具
l 在DB新增資料的部分,也是花了很多時間的兇手。 原因是採用one connection 9,000~1,0000 筆資料做Commit. 後來決定2,000筆Commit一次, 並且用非同步的方式送出多個batch insert指令給資料庫。
▋程式調效 (After)
l 主程式
//每個檔案處理間格時間(毫秒),以避免同時開太多Thread造成Out of memory
private static int
FILE_HANDLE_DELAY_TIME = 1000;
//只允許Private Memory的最大可使用量,如果超過此值,會讓工作先等待Memory降下來再繼續執行
private static int
MAX_MEMORY_ALLOWED_MB = 800;
//Batch insert
size,每個檔案資料筆數/此設定值 = 每個檔案開幾條Thread
private static int BATCH_INSERT_SIZE
= 1000;
foreach (var filePath in System.IO.Directory.GetFileSystemEntries(this._tarDirPath))
{
//Import data asynchronous
this.doImport(filePath);
fileIndex++;
//Monitor memory and delay the
import task if need
this.taskDelayWithMemoryLimit();
}
|
l 每?筆送出一次batch insert
private void doImport(String filePath)
{
StreamReader sr = new StreamReader(filePath);
var ccnsBatch = new List<CsvCCN>();
int batchSize = BATCH_INSERT_SIZE; //?筆commit一次
int batchIndex = 0; //batch size counter
try{
while (!sr.EndOfStream)
{
String str = sr.ReadLine();
//Parse data to DAO from file stream
CsvCCN ccn = null;
this.parseToDao(filePath, str, out ccn);
cnsBatch.Add(ccn);
batchIndex++;
//Run async import task when the
batch size reaches BATCH_INSERT_SIZE
if (batchIndex.Equals(batchSize)) {
this.runTask(ccnsBatch);
ccnsBatch.Clear();
batchIndex
= 0; //Reset the counter
}
else{
continue;
}
}
//Run
async import task with the data left
if (ccnsBatch.Count
> 0)
{
this.runTask(ccnsBatch);
}
ccnsBatch.Clear();
}
catch (Exception) {
throw;
}
finally{
//GC
ccnsBatch = null;
sr.Close();
sr.Dispose();
//System.GC.Collect();
}
}
|
l 當記憶體超過使用限制時,立即暫停工作並停止產生新的batch insert thread.
private void
taskDelayWithMemoryLimit()
{
//Get the memory usage of the process
Process proc = Process.GetCurrentProcess();
//Task delay
double privateMemory =
0;
double gcTotalMemory =
0;
do{
Task.Delay(FILE_HANDLE_DELAY_TIME).Wait();
privateMemory
= (double)proc.PrivateMemorySize64 / 1024 / 1024;
gcTotalMemory
= (double)GC.GetTotalMemory(true) / 1024 / 1024;
Debug.WriteLine(String.Format(
"{0} Process private memory usage : {1}MB ,
Garbage collector : {2}MB",
DateTime.Now.ToString(),
privateMemory, gcTotalMemory
));
} while (gcTotalMemory > MAX_MEMORY_ALLOWED_MB);
}
|
l 產生一條新的Thread做batch insert
※ 注意這邊要將外部的參數先deep clone一份以確保thread safe,
※ 注意這邊要將外部的參數先deep clone一份以確保thread safe,
private void runTask(List<CsvCCN> ccnsBatch)
{
List<CsvCCN> ccnsBatchCopy = null;
using (var dp = new Domain.DeepCopy.DeepCopyObjList<CsvCCN>())
{
ccnsBatchCopy =
dp.Clone(ccnsBatch);
}
Task doTask = new Task(
() => asyncImport(ccnsBatchCopy));
doTask.ContinueWith(x => {
Console.WriteLine(String.Format("Import Thread {0} 結束。", doTask.Id));
ccnsBatchCopy.Clear();
ccnsBatchCopy = null;
});
Console.WriteLine(String.Format("Import
Thread {0} 開始 ...", doTask.Id));
doTask.Start();
}
|
▋執行紀錄
l 監看Memory Usage
2014/11/27 下午 07:28:35 Process private memory
usage : 20.6875MB , Garbage collector : 0.804416656494141MB
2014/11/27 下午 07:28:36 Process private memory
usage : 20.6875MB , Garbage collector : 0.804409027099609MB
2014/11/27 下午 07:28:36 Process private memory
usage : 20.6875MB , Garbage collector : 0.804416656494141M
2014/11/27 下午
07:28:37 Process private memory usage : 20.6875MB , Garbage collector :
0.804416656494141MB
l Running threads
▌Reference