远程抓取GOOGLE的自动翻译结果
2006-10-27 17:17:03 来源:WEB开发网核心提示: 1 Function RemoveHTML()Function RemoveHTML(strhtml) 2 if strhtml<>"" then 3 dim s_str as string 4 s_str=regex.replace(strHTML,"<[^>
1 Function RemoveHTML()Function RemoveHTML(strhtml)
2 if strhtml<>"" then
3 dim s_str as string
4 s_str=regex.replace(strHTML,"<[^>]+>","")
5 s_str=replace(s_str," ","")
6 return s_str
7 end if
8 End Function
9
10 Function china_to_english()Function china_to_english(texts,languages)
11 Dim payload As String = "hl=zh-CN&ie=UTF8&text="& texts &"&langpair="& languages &""
12 dim str_return as string
13 Dim req As WebRequest = WebRequest.Create("http://translate.google.com/translate_t") ' 开始取连接.
14 req.Credentials = CredentialCache.DefaultCredentials '取得默认
15 req.Method = "POST" '以POST方式发送,这里默认是以GET方式发送
16 req.ContentType = "application/x-www-form-urlencoded" 'POST方式需在传送这个编码,如果上传文件,则修为multipart/form-data
17 req.timeout=10000 '连接超时定时
18 req.ContentLength = payload.Length '头部长度
19 Dim encoding As Encoding = Encoding.GetEncoding("UTF-8") '转换成流,大部网站一般转换成UTF-8就可以了,注意是大写的编码
20 Dim bytes As Byte() = Encoding.GetBytes(payload) '转换成流
21 req.ContentLength = bytes.Length '传送流的长度
22 Dim newStream As Stream = req.GetRequestStream() '转换写入
23 newStream.Write(bytes, 0, bytes.Length) '写入传送流
24 newStream.Close() '关闭
25 '上面发送完成,下面取得服务器返回
26 Dim res As HttpWebResponse = CType(req.GetResponse(), HttpWebResponse) ' 传递返回标识
27 if res.StatusDescription="ok" then ' 返回取得状态.
28 current.response.write("暂时无法连接到网站,请换用另一个程序")
29 current.response.end()
30 end if
31 Dim dataStream As Stream = res.GetResponseStream() ' 返回给指针
32 Dim reader As New StreamReader(dataStream,encoding.getencoding("gb2312")) ' 读
33 Dim responseFromServer As String = reader.ReadToEnd() ' 读取所有
34
35 str_return=responseFromServer '赋值回传
36
37 reader.Close() '接下来三个关闭
38 dataStream.Close()
39 res.Close()
40
41 dim ss as string = str_return
42 ss = regex.replace(ss,"(?i:(.+)(\<div)(.+)(\>)(.+)(\<\/div\>)(.+))","$5") '提取我们所要的译文
43 ss = RemoveHTML(ss) '删除HTML
44 ss = ss.substring(3) '删除翻译二字
45 return ss '函数返回
46 End function
47
48 在调用china_to_english(texts,languages)需要传两个参数第一个为要译的文字,第二个是要进行相对译的语种代码.
例:中译英,其第二个参数为:zh-Cn|en
我导入的命名空间如下:
Imports System
Imports System.Web
Imports System.IO
Imports Microsoft.VisualBasic
Imports System.Web.HttpContext
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Text
Imports System.Text.RegularExPRessions
Imports System.Net
2 if strhtml<>"" then
3 dim s_str as string
4 s_str=regex.replace(strHTML,"<[^>]+>","")
5 s_str=replace(s_str," ","")
6 return s_str
7 end if
8 End Function
9
10 Function china_to_english()Function china_to_english(texts,languages)
11 Dim payload As String = "hl=zh-CN&ie=UTF8&text="& texts &"&langpair="& languages &""
12 dim str_return as string
13 Dim req As WebRequest = WebRequest.Create("http://translate.google.com/translate_t") ' 开始取连接.
14 req.Credentials = CredentialCache.DefaultCredentials '取得默认
15 req.Method = "POST" '以POST方式发送,这里默认是以GET方式发送
16 req.ContentType = "application/x-www-form-urlencoded" 'POST方式需在传送这个编码,如果上传文件,则修为multipart/form-data
17 req.timeout=10000 '连接超时定时
18 req.ContentLength = payload.Length '头部长度
19 Dim encoding As Encoding = Encoding.GetEncoding("UTF-8") '转换成流,大部网站一般转换成UTF-8就可以了,注意是大写的编码
20 Dim bytes As Byte() = Encoding.GetBytes(payload) '转换成流
21 req.ContentLength = bytes.Length '传送流的长度
22 Dim newStream As Stream = req.GetRequestStream() '转换写入
23 newStream.Write(bytes, 0, bytes.Length) '写入传送流
24 newStream.Close() '关闭
25 '上面发送完成,下面取得服务器返回
26 Dim res As HttpWebResponse = CType(req.GetResponse(), HttpWebResponse) ' 传递返回标识
27 if res.StatusDescription="ok" then ' 返回取得状态.
28 current.response.write("暂时无法连接到网站,请换用另一个程序")
29 current.response.end()
30 end if
31 Dim dataStream As Stream = res.GetResponseStream() ' 返回给指针
32 Dim reader As New StreamReader(dataStream,encoding.getencoding("gb2312")) ' 读
33 Dim responseFromServer As String = reader.ReadToEnd() ' 读取所有
34
35 str_return=responseFromServer '赋值回传
36
37 reader.Close() '接下来三个关闭
38 dataStream.Close()
39 res.Close()
40
41 dim ss as string = str_return
42 ss = regex.replace(ss,"(?i:(.+)(\<div)(.+)(\>)(.+)(\<\/div\>)(.+))","$5") '提取我们所要的译文
43 ss = RemoveHTML(ss) '删除HTML
44 ss = ss.substring(3) '删除翻译二字
45 return ss '函数返回
46 End function
47
48 在调用china_to_english(texts,languages)需要传两个参数第一个为要译的文字,第二个是要进行相对译的语种代码.
例:中译英,其第二个参数为:zh-Cn|en
我导入的命名空间如下:
Imports System
Imports System.Web
Imports System.IO
Imports Microsoft.VisualBasic
Imports System.Web.HttpContext
Imports System.Web.UI
Imports System.Web.UI.WebControls
Imports System.Text
Imports System.Text.RegularExPRessions
Imports System.Net
- ››Google搜索引擎的奥秘
- ››Google测试搜索结果页面右侧内容更丰富的信息栏
- ››Google Dart精粹:应用构建,快照和隔离体
- ››google的代码审查
- ››google analytics清晰追踪爬虫的爬行信息
- ››Google+中文用户在两千万Google+大军中是少数派
- ››Google AdWords最昂贵点击成本的20种关键词分类
- ››Google运作经理Bryan Power给出的GOOGLE求职意见
- ››Google用户体验的十大设计原则
- ››Google Analytics(分析)能为网站带来什么
- ››Google goggles图片搜索 如何优化一个wap网站
- ››Google Docs将增加iPhone和Android编辑功能
更多精彩
赞助商链接