<?xml version="1.0" encoding="gb2312" standalone="no" ?>
<!DOCTYPE utterance (View Source for full doctype...)>
- <utterance total_sample="124907">
<sil end_sample="8731" /> //这个应作为第一个start_sample
- <sentence>
- <prosodic_phrase>
- <prosodic_word>
<syllable ch_char="为" pinyin="wei4" end_sample="14212" />
<sil end_sample="14318" />
<syllable ch_char="临" pinyin="lin2" end_sample="18632" />
<sil end_sample="18703" />
<syllable ch_char="帖" pinyin="tie4" end_sample="25740" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="30319" />
<syllable ch_char="他" pinyin="ta1" end_sample="33343" />
<sil end_sample="33414" />
<syllable ch_char="还" pinyin="hai2" end_sample="37391" bad="yes" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="37462" />
<syllable ch_char="远" pinyin="yuan3" end_sample="41617" />
<sil end_sample="41741" />
<syllable ch_char="游" pinyin="you2" end_sample="47168" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="47232" />
<syllable ch_char="西" pinyin="xi1" end_sample="50740" />
<sil end_sample="50775" />
<syllable ch_char="安" pinyin="an1" end_sample="54736" bad="yes" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="55184" />
<syllable ch_char="碑" pinyin="bei1" end_sample="58192" />
<sil end_sample="58239" />
<syllable ch_char="林" pinyin="lin2" end_sample="65393" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="69120" />
<syllable ch_char="龙" pinyin="long2" end_sample="73037" />
<sil end_sample="73081" />
<syllable ch_char="门" pinyin="men2" end_sample="77216" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="77291" />
<syllable ch_char="石" pinyin="shi2" end_sample="81843" />
<sil end_sample="82011" />
<syllable ch_char="窟" pinyin="ku1" end_sample="87964" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="92561" />
<syllable ch_char="泰" pinyin="tai4" end_sample="97200" />
<sil end_sample="97313" />
<syllable ch_char="山" pinyin="shan1" end_sample="102455" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="102508" />
<syllable ch_char="摩" pinyin="mo2" end_sample="106749" bad="yes" />
<sil end_sample="106820" />
<syllable ch_char="崖" pinyin="ya2" end_sample="110016" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="110159" />
<syllable ch_char="石" pinyin="shi2" end_sample="114153" />
<sil end_sample="114416" />
<syllable ch_char="刻" pinyin="ke4" end_sample="119772" />
</prosodic_word>
</prosodic_phrase>
<sil end_sample="124907" /> //这个不必存入表格
</sentence>
</utterance>
这是其中一个文件,查询的代码如下:
DECLARE @idoc int
DECLARE @doc varchar(8000)--从文件中读出XML内容到临时表(假设xml文件为c:\a.xml)
create table #tb (doc varchar(8000))
bulk insert #tb from 'F:\corpus samples\sen0001.xml'--将读取到的数据保存到变量中
set @doc=''
select @doc=@doc+doc from #tb--删除临时表
drop table #tb--获取数据
EXEC sp_xml_preparedocument @idoc OUTPUT, @docSELECT * FROM OPENXML (@idoc, '/utterance/sentence/prosodic_phrase/prosodic_word/syllable',2)
WITH (ch_char char(4) '@ch_char',
pinyin char(10) '@pinyin',
start_sample int '../sil/@end_sample',
end_sample int '@end_sample')
EXEC sp_xml_removedocument @idoc
结果不是很对,没有从第一个开始依次把.../sil/@end_sample作为start_sample输出,是由于所处的位置不一致导致的吧,应该怎么改呢?
<!DOCTYPE utterance (View Source for full doctype...)>
- <utterance total_sample="124907">
<sil end_sample="8731" /> //这个应作为第一个start_sample
- <sentence>
- <prosodic_phrase>
- <prosodic_word>
<syllable ch_char="为" pinyin="wei4" end_sample="14212" />
<sil end_sample="14318" />
<syllable ch_char="临" pinyin="lin2" end_sample="18632" />
<sil end_sample="18703" />
<syllable ch_char="帖" pinyin="tie4" end_sample="25740" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="30319" />
<syllable ch_char="他" pinyin="ta1" end_sample="33343" />
<sil end_sample="33414" />
<syllable ch_char="还" pinyin="hai2" end_sample="37391" bad="yes" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="37462" />
<syllable ch_char="远" pinyin="yuan3" end_sample="41617" />
<sil end_sample="41741" />
<syllable ch_char="游" pinyin="you2" end_sample="47168" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="47232" />
<syllable ch_char="西" pinyin="xi1" end_sample="50740" />
<sil end_sample="50775" />
<syllable ch_char="安" pinyin="an1" end_sample="54736" bad="yes" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="55184" />
<syllable ch_char="碑" pinyin="bei1" end_sample="58192" />
<sil end_sample="58239" />
<syllable ch_char="林" pinyin="lin2" end_sample="65393" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="69120" />
<syllable ch_char="龙" pinyin="long2" end_sample="73037" />
<sil end_sample="73081" />
<syllable ch_char="门" pinyin="men2" end_sample="77216" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="77291" />
<syllable ch_char="石" pinyin="shi2" end_sample="81843" />
<sil end_sample="82011" />
<syllable ch_char="窟" pinyin="ku1" end_sample="87964" />
</prosodic_word>
</prosodic_phrase>
- <prosodic_phrase>
- <prosodic_word>
<sil end_sample="92561" />
<syllable ch_char="泰" pinyin="tai4" end_sample="97200" />
<sil end_sample="97313" />
<syllable ch_char="山" pinyin="shan1" end_sample="102455" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="102508" />
<syllable ch_char="摩" pinyin="mo2" end_sample="106749" bad="yes" />
<sil end_sample="106820" />
<syllable ch_char="崖" pinyin="ya2" end_sample="110016" />
</prosodic_word>
- <prosodic_word>
<sil end_sample="110159" />
<syllable ch_char="石" pinyin="shi2" end_sample="114153" />
<sil end_sample="114416" />
<syllable ch_char="刻" pinyin="ke4" end_sample="119772" />
</prosodic_word>
</prosodic_phrase>
<sil end_sample="124907" /> //这个不必存入表格
</sentence>
</utterance>
这是其中一个文件,查询的代码如下:
DECLARE @idoc int
DECLARE @doc varchar(8000)--从文件中读出XML内容到临时表(假设xml文件为c:\a.xml)
create table #tb (doc varchar(8000))
bulk insert #tb from 'F:\corpus samples\sen0001.xml'--将读取到的数据保存到变量中
set @doc=''
select @doc=@doc+doc from #tb--删除临时表
drop table #tb--获取数据
EXEC sp_xml_preparedocument @idoc OUTPUT, @docSELECT * FROM OPENXML (@idoc, '/utterance/sentence/prosodic_phrase/prosodic_word/syllable',2)
WITH (ch_char char(4) '@ch_char',
pinyin char(10) '@pinyin',
start_sample int '../sil/@end_sample',
end_sample int '@end_sample')
EXEC sp_xml_removedocument @idoc
结果不是很对,没有从第一个开始依次把.../sil/@end_sample作为start_sample输出,是由于所处的位置不一致导致的吧,应该怎么改呢?
select a.Cdt,b.CustSno,b.Pno,a.SnoId,a.WC,b.PdLine,a.Defect,a.Cause,[USER].dbo.get_xml(a.Re,'REMARK') as Re,rtrim([USER].dbo.get_xml(a.Re,'Action')) as Action,[USER].dbo.get_xml(a.Re,'4M') as FourM,[USER].dbo.get_xml(a.Re,'TS') as Status,a.Editor,a.Mark
因为它的dtd十足外部定义的,你删掉第二行就可以了。
to jfei793(fisher) :
能不能讲得具体点啊?那些东西我都没弄过呢
DECLARE @idoc int,@doc varchar(8000)--从文件中读出XML内容到临时表(F:\corpus samples\sen0001.xml)
create table #tb (doc varchar(8000))
bulk insert #tb from 'F:\corpus samples\sen0001.xml'--将读取到的数据保存到变量中
set @doc=''
select @doc=@doc+doc from #tb--删除临时表
drop table #tb--获取数据
EXEC sp_xml_preparedocument @idoc OUTPUT, @doc--创建测试的表
create table syllables
(ch_char char(4),pinyin char(10),start_sample int,end_sample int)--下面是数据处理
insert into syllables(ch_char,pinyin,start_sample,end_sample)SELECT * FROM OPENXML (@idoc, '/utterance/sentence/prosodic_phrase/prosodic_word/syllable',2)
WITH (ch_char char(4) '@ch_char',
pinyin char(10) '@pinyin',
start_sample int '../sil/@end_sample',
end_sample int '@end_sample')
EXEC sp_xml_removedocument @idoc
--显示处理结果
select * from syllables得到如下结果
char pinyin start_sample end_sample
为 wei4 14318 14212
临 lin2 14318 18632
帖 tie4 14318 25740
他 ta1 30319 33343
还 hai2 30319 37391
远 yuan3 37462 41617
游 you2 37462 47168
西 xi1 47232 50740
安 an1 47232 54736
碑 bei1 55184 58192
林 lin2 55184 65393
龙 long2 69120 73037
门 men2 69120 77216
石 shi2 77291 81843
窟 ku1 77291 87964
泰 tai4 92561 97200
山 shan1 92561 102455
摩 mo2 102508 106749
崖 ya2 102508 110016
石 shi2 110159 114153
刻 ke4 110159 119772
其中start_sample出错了,应该从8731开始不重复地显示,不知道程序该如何改动?