[Note] Import Data in SQL Server 2005 (匯入資料)

January 9th, 2009 Phanix

Remeber to check the item “Integration Services” (SSIS) in the install process, or you will fail and get a error message like “product level is insufficient for components” while importing data from txt, excel, etc. files into a SQL server 2005 database.

Related: porduct level is insufficient for components

還是改用Regular expression好了

December 19th, 2008 Phanix

既然都有誤斬忠良的時候… 那 extract html text content 還是用 regular expression 好了,然後再特殊處理一下 <script> 和 <style>…

int i, j;
i = tb1.Text.ToLower().IndexOf("<script");
while (i >= 0)
{
    j = tb1.Text.ToLower().IndexOf("</script>", i);
    tb1.Text = tb1.Text.Substring(0, i) + tb1.Text.Substring(j + 9);
    i = tb1.Text.ToLower().IndexOf("<script");
}

i = tb1.Text.ToLower().IndexOf("<style");
while (i >= 0)
{
    j = tb1.Text.ToLower().IndexOf("</style>", i);
    tb1.Text = tb1.Text.Substring(0, i) + tb1.Text.Substring(j + 8);
    i = tb1.Text.ToLower().IndexOf("<style");
}
tb1.Text = Regex.Replace(tb1.Text, "<[^>]*>", " ");

[Memo] 整理一下最近寫程式用到的東西

December 4th, 2008 Phanix

免得以後要找很麻煩。都是 C# 的程式。

Threading 中作 output 到 textbox 中

delegate void SetTextCallback(TextBox tb, string text);

private void SetText(TextBox tb, string text)
{
    if (tb.InvokeRequired)
    {
        SetTextCallback d = new SetTextCallback(SetText);
        this.Invoke(d, new object[] { tb, text });
    }
    else
    {
        tb.Text = text;
    }
}

Extract text in <body> tag

strpage = ""; // Store The HTML Source
strtext = "";

// only fetch text between <body>
ibodystart = strpage.ToLower().IndexOf("<body");
ibodyend = strpage.ToLower().IndexOf("</body>");

if (ibodystart < 0) return;
if (ibodyend < 0) ibodyend = strpage.Length;

// j and k are used to quote text between continous tags
j = strpage.IndexOf(">", ibodystart);

sw = new StreamWriter([FILENAME], false, Encoding.UTF8);

#region filter out html tags, css and scripts, and then just keep plaintext
while (j > 0 && j < ibodyend)
{
    // j and k are used to quote text between continous tags
    k = strpage.IndexOf("<", j);
    
    
    // read text between tags, and store in strtmp
    if (k < 0)
    {
        strtmp = strpage.Substring(j + 1);
    }
    else
    {
        strtmp = strpage.Substring(j + 1, k - j - 1);
    }
    
    
    strtmp = HttpUtility.HtmlDecode(strtmp).Trim();
    
    // concate strtext and strtmp
    if (strtmp != "")
    {
        if (strtext == "")
        {
            sw.WriteLine(strtmp);
            strtext = strtmp;
        }
        else
        {
            sw.WriteLine(" " + strtmp);
        }
    }
    
    // find out next j
    if (k < 0)
    {
        j = -1;
    }
    else
    {
        //check comment
        if (strpage.Substring(k).Length <= 7)
        {
            j = -1;
        }
        else if (strpage.Substring(k, 4) == "<!--")
        {
            j = strpage.IndexOf("-->", k);
            if (j >= 0)
            {
                j = strpage.IndexOf(">", j);
            }
        }
        else if (strpage.ToLower().Substring(k, 7) == "<script")
        {
            j = strpage.ToLower().IndexOf("</script>", k);
            if (j >= 0)
            {
                j = strpage.IndexOf(">", j);
            }
        }
        else if (strpage.ToLower().Substring(k, 6) == "<style")
        {
            j = strpage.ToLower().IndexOf("</style>", k);
            if (j >= 0)
            {
                j = strpage.IndexOf(">", j);
            }
        }
        else
        {
            j = strpage.IndexOf(">", k);
        }
    }
}
#endregion

sw.Close();

Execute the other .exe with parameters from command line (without showing the window). This example uses WordNet.

Process p = new Process();
string strwn1, strwn2;

#region Call wn.exe for wordnet hypernym
p.StartInfo.UseShellExecute = false;
p.StartInfo.RedirectStandardOutput = true;
p.StartInfo.CreateNoWindow = true;

// word 1
p.StartInfo.FileName = @"C:\Program Files\WordNet\2.1\bin\wn.exe";
p.StartInfo.Arguments = @"" + w1 + " -hypen"; // w1 is a word

p.Start();

strwn1 = p.StandardOutput.ReadToEnd();

p.WaitForExit();

// word 2
p.StartInfo.FileName = @"C:\Program Files\WordNet\2.1\bin\wn.exe";
p.StartInfo.Arguments = @"" + w2 + " -hypen"; // w2 is the other word

p.Start();

strwn2 = p.StandardOutput.ReadToEnd();

p.WaitForExit();

#endregion

Disabling Close button on Forms

April 9th, 2008 Phanix

//Used in .net
private const int CP_NOCLOSE_BUTTON = 0×200;
protected override CreateParams CreateParams
{
        get
        {
                CreateParams myCp = base.CreateParams;
                myCp.ClassStyle = myCp.ClassStyle | CP_NOCLOSE_BUTTON;
                return myCp;
        }
}

WordPress 與 MySQL 的 Encoding 大混戰…

March 9th, 2008 Phanix

週五晚上終於順利解決這個煩人的 encoding 問題

Read the rest of this entry / 繼續閱讀 »

M$ SQL Server BCP utility

February 28th, 2008 Phanix

BCP為M$ SQL Server的一個公用程式(utility),主要的功能為將大量儲存在檔案中的資料拷貝進入資料庫表格中,或者將資料庫表格中的資料匯出到檔案中,而這些動作都可以透過command line來完成,所以也可以很方便地透過batch file的編寫來當成scheduled job來使用。

Read the rest of this entry / 繼續閱讀 »

Lucene.NET

February 2nd, 2008 Phanix

之前整理的資料…

Read the rest of this entry / 繼續閱讀 »

C# 進行對 Windows Form 控制項的安全執行緒呼叫

September 3rd, 2007 Phanix

Windows Form 控制項的存取並非原本就採用安全執行緒的方式。當有兩個或多個執行緒管理控制項的狀態,就有可能強制控制項進入不一致的狀態。其他與執行緒有關的錯誤也有可能如此,包括競爭情形和死結。

Read the rest of this entry / 繼續閱讀 »

.NET 見鬼難用的 CheckedListBox

August 29th, 2007 Phanix

一點都不直覺的一個Windows Form Control

Read the rest of this entry / 繼續閱讀 »

.net 讀取 Excel 文字

August 6th, 2007 Phanix

Still in VS 2005 IDE

Read the rest of this entry / 繼續閱讀 »