数据量大可以拆表吗?怎么拆才合理?
表A 5000万
declare @i int
set @i=1
while(@i<=10)
begin
select *
into #tempA
from DAY_SITE_SE_PROVIDER_KEYWORD
where (@i-1)*10000<daypkid and daypkid<=@i*10000
set @i=@i+1
end
我的想法是做循环,每次去一万,这样估计要取5万次,然后呢........
表A 5000万
declare @i int
set @i=1
while(@i<=10)
begin
select *
into #tempA
from DAY_SITE_SE_PROVIDER_KEYWORD
where (@i-1)*10000<daypkid and daypkid<=@i*10000
set @i=@i+1
end
我的想法是做循环,每次去一万,这样估计要取5万次,然后呢........
解决方案 »
- 有一个项目,需要visual C++ 对microsoft sql server 进行数据的管理。 比如说数据的存取。
- 求大于平均工资的员工
- openquery和sybase的难题
- 大家帮忙看看DBCC SHOWCONFIG正常么,count一下超级慢啊,看执行计划也没什么不对的,索引都用上了
- 如何在存储过程定位某一行,如我要看存储第100行是哪个语句,怎么看呢,请各位指教,在线等啊
- 如何获得SQL Server中数据库的大小?
- 关于作业失败的问题
- 如何返回一个值,从而判定SQL查询结果是否为空?
- RECOVERY与NORECOVERY是否这样理解?
- 请各位赐教
- 数据库中有重复的数据.请高手们给写的语句删除.谢谢
- 请教个数据库升级,客户端连接服务器时碰到的问题
将数据bcp出来,存为文本格式的数据文件
用c/basic一个文件拆分工具,将文件拆分为合适的大小
再逐一将拆分好的文件bcp in 相应的表。
USE master
-- 备份
BACKUP DATABASE AdventureWorks
TO DISK = 'AdventureWorks.bak'
WITH FORMAT---- 恢复
--RESTORE DATABASE AdventureWorks
-- FROM DISK = 'AdventureWorks.bak'
-- WITH REPLACE
GO--=========================================
-- 转换为分区表
--=========================================
USE AdventureWorks
GO-- 1. 创建分区函数
-- a. 适用于存储历史存档记录的分区表的分区函数
DECLARE @dt datetime
SET @dt = '20020101'
CREATE PARTITION FUNCTION PF_HistoryArchive(datetime)
AS RANGE RIGHT
FOR VALUES(
@dt,
DATEADD(Year, 1, @dt))-- b. 适用于存储历史记录的分区表的分区函数
--DECLARE @dt datetime
SET @dt = '20030901'
CREATE PARTITION FUNCTION PF_History(datetime)
AS RANGE RIGHT
FOR VALUES(
@dt,
DATEADD(Month, 1, @dt), DATEADD(Month, 2, @dt), DATEADD(Month, 3, @dt),
DATEADD(Month, 4, @dt), DATEADD(Month, 5, @dt), DATEADD(Month, 6, @dt),
DATEADD(Month, 7, @dt), DATEADD(Month, 8, @dt), DATEADD(Month, 9, @dt),
DATEADD(Month, 10, @dt), DATEADD(Month, 11, @dt), DATEADD(Month, 12, @dt))
GO-- 2. 创建分区架构
-- a. 适用于存储历史存档记录的分区表的分区架构
CREATE PARTITION SCHEME PS_HistoryArchive
AS PARTITION PF_HistoryArchive
TO([PRIMARY], [PRIMARY], [PRIMARY])-- b. 适用于存储历史记录的分区表的分区架构
CREATE PARTITION SCHEME PS_History
AS PARTITION PF_History
TO([PRIMARY], [PRIMARY],
[PRIMARY], [PRIMARY], [PRIMARY],
[PRIMARY], [PRIMARY], [PRIMARY],
[PRIMARY], [PRIMARY], [PRIMARY],
[PRIMARY], [PRIMARY], [PRIMARY])
GO-- 3. 删除索引
-- a. 删除存储历史存档记录的表中的索引
DROP INDEX Production.TransactionHistoryArchive.IX_TransactionHistoryArchive_ProductID
DROP INDEX Production.TransactionHistoryArchive.IX_TransactionHistoryArchive_ReferenceOrderID_ReferenceOrderLineID-- b. 删除存储历史记录的表中的索引
DROP INDEX Production.TransactionHistory.IX_TransactionHistory_ProductID
DROP INDEX Production.TransactionHistory.IX_TransactionHistory_ReferenceOrderID_ReferenceOrderLineID
GO-- 4. 转换为分区表
-- a. 将存储历史存档记录的表转换为分区表
ALTER TABLE Production.TransactionHistoryArchive
DROP CONSTRAINT PK_TransactionHistoryArchive_TransactionID
WITH(
MOVE TO PS_HistoryArchive(TransactionDate))-- b.将存储历史记录的表转换为分区表
ALTER TABLE Production.TransactionHistory
DROP CONSTRAINT PK_TransactionHistory_TransactionID
WITH(
MOVE TO PS_History(TransactionDate))
GO-- 5. 恢复主键
-- a. 恢复存储历史存档记录的分区表的主键
ALTER TABLE Production.TransactionHistoryArchive
ADD CONSTRAINT PK_TransactionHistoryArchive_TransactionID
PRIMARY KEY CLUSTERED(
TransactionID,
TransactionDate)-- b. 恢复存储历史记录的分区表的主键
ALTER TABLE Production.TransactionHistory
ADD CONSTRAINT PK_TransactionHistory_TransactionID
PRIMARY KEY CLUSTERED(
TransactionID,
TransactionDate)
GO-- 6. 恢复索引
-- a. 恢复存储历史存档记录的分区表的索引
CREATE INDEX IX_TransactionHistoryArchive_ProductID
ON Production.TransactionHistoryArchive(
ProductID)CREATE INDEX IX_TransactionHistoryArchive_ReferenceOrderID_ReferenceOrderLineID
ON Production.TransactionHistoryArchive(
ReferenceOrderID,
ReferenceOrderLineID)-- b. 恢复存储历史记录的分区表的索引
CREATE INDEX IX_TransactionHistory_ProductID
ON Production.TransactionHistory(
ProductID)CREATE INDEX IX_TransactionHistory_ReferenceOrderID_ReferenceOrderLineID
ON Production.TransactionHistory(
ReferenceOrderID,
ReferenceOrderLineID)
GO-- 7. 查看分区表的相关信息
SELECT
SchemaName = S.name,
TableName = TB.name,
PartitionScheme = PS.name,
PartitionFunction = PF.name,
PartitionFunctionRangeType = CASE
WHEN boundary_value_on_right = 0 THEN 'LEFT'
ELSE 'RIGHT' END,
PartitionFunctionFanout = PF.fanout,
SchemaID = S.schema_id,
ObjectID = TB.object_id,
PartitionSchemeID = PS.data_space_id,
PartitionFunctionID = PS.function_id
FROM sys.schemas S
INNER JOIN sys.tables TB
ON S.schema_id = TB.schema_id
INNER JOIN sys.indexes IDX
on TB.object_id = IDX.object_id
AND IDX.index_id < 2
INNER JOIN sys.partition_schemes PS
ON PS.data_space_id = IDX.data_space_id
INNER JOIN sys.partition_functions PF
ON PS.function_id = PF.function_id
GO--=========================================
-- 移动分区表数据
--=========================================
-- 1. 为存储历史存档记录的分区表增加分区, 并接受从历史记录分区表移动过来的数据
-- a. 修改分区架构, 增加用以接受新分区的文件组
ALTER PARTITION SCHEME PS_HistoryArchive
NEXT USED [PRIMARY]-- b. 修改分区函数, 增加分区用以接受从历史记录分区表移动过来的数据
DECLARE @dt datetime
SET @dt = '20030901'
ALTER PARTITION FUNCTION PF_HistoryArchive()
SPLIT RANGE(@dt)-- c. 将历史记录表中的过期数据移动到历史存档记录表中
ALTER TABLE Production.TransactionHistory
SWITCH PARTITION 2
TO Production.TransactionHistoryArchive PARTITION $PARTITION.PF_HistoryArchive(@dt)-- d. 将接受到的数据与原来的分区合并
ALTER PARTITION FUNCTION PF_HistoryArchive()
MERGE RANGE(@dt)
GO-- 2. 将存储历史记录的分区表中不包含数据的分区删除, 并增加新的分区以接受新数据
-- a. 合并不包含数据的分区
DECLARE @dt datetime
SET @dt = '20030901'
ALTER PARTITION FUNCTION PF_History()
MERGE RANGE(@dt)-- b. 修改分区架构, 增加用以接受新分区的文件组
ALTER PARTITION SCHEME PS_History
NEXT USED [PRIMARY]-- c. 修改分区函数, 增加分区用以接受新数据
SET @dt = '20041001'
ALTER PARTITION FUNCTION PF_History()
SPLIT RANGE(@dt)
GO
-- 清除历史存档记录中的过期数据
--=========================================
-- 1. 创建用于保存过期的历史存档数据的表
CREATE TABLE Production.TransactionHistoryArchive_2001_temp(
TransactionID int NOT NULL,
ProductID int NOT NULL,
ReferenceOrderID int NOT NULL,
ReferenceOrderLineID int NOT NULL
DEFAULT ((0)),
TransactionDate datetime NOT NULL
DEFAULT (GETDATE()),
TransactionType nchar(1) NOT NULL,
Quantity int NOT NULL,
ActualCost money NOT NULL,
ModifiedDate datetime NOT NULL
DEFAULT (GETDATE()),
CONSTRAINT PK_TransactionHistoryArchive_2001_temp_TransactionID
PRIMARY KEY CLUSTERED(
TransactionID,
TransactionDate)
)-- 2. 将数据从历史存档记录分区表移动到第1步创建的表中
ALTER TABLE Production.TransactionHistoryArchive
SWITCH PARTITION 1
TO Production.TransactionHistoryArchive_2001_temp-- 3. 删除不再包含数据的分区
DECLARE @dt datetime
SET @dt = '20020101'
ALTER PARTITION FUNCTION PF_HistoryArchive()
MERGE RANGE(@dt)-- 4. 修改分区架构, 增加用以接受新分区的文件组
ALTER PARTITION SCHEME PS_HistoryArchive
NEXT USED [PRIMARY]-- 5. 修改分区函数, 增加分区用以接受新数据
SET @dt = '20040101'
ALTER PARTITION FUNCTION PF_HistoryArchive()
SPLIT RANGE(@dt)
查询分区信息:;WITH
TBINFO AS(
SELECT
SchemaName = S.name,
TableName = TB.name,
PartitionScheme = PS.name,
PartitionFunction = PF.name,
PartitionFunctionRangeType = CASE
WHEN boundary_value_on_right = 0 THEN 'LEFT'
ELSE 'RIGHT' END,
PartitionFunctionFanout = PF.fanout,
SchemaID = S.schema_id,
ObjectID = TB.object_id,
PartitionSchemeID = PS.data_space_id,
PartitionFunctionID = PS.function_id
FROM sys.schemas S
INNER JOIN sys.tables TB
ON S.schema_id = TB.schema_id
INNER JOIN sys.indexes IDX
on TB.object_id = IDX.object_id
AND IDX.index_id < 2
INNER JOIN sys.partition_schemes PS
ON PS.data_space_id = IDX.data_space_id
INNER JOIN sys.partition_functions PF
ON PS.function_id = PF.function_id
),
PF1 AS(
SELECT
PFP.function_id, PFR.boundary_id, PFR.value,
Type = CONVERT(sysname,
CASE T.name
WHEN 'numeric' THEN 'decimal'
WHEN 'real' THEN 'float'
ELSE T.name END
+ CASE
WHEN T.name IN('decimal', 'numeric')
THEN QUOTENAME(RTRIM(PFP.precision)
+ CASE WHEN PFP.scale > 0 THEN ',' + RTRIM(PFP.scale) ELSE '' END, '()')
WHEN T.name IN('float', 'real')
THEN QUOTENAME(PFP.precision, '()')
WHEN T.name LIKE 'n%char'
THEN QUOTENAME(PFP.max_length / 2, '()')
WHEN T.name LIKE '%char' OR T.name LIKE '%binary'
THEN QUOTENAME(PFP.max_length, '()')
ELSE '' END)
FROM sys.partition_parameters PFP
LEFT JOIN sys.partition_range_values PFR
ON PFR.function_id = PFP.function_id
AND PFR.parameter_id = PFP.parameter_id
INNER JOIN sys.types T
ON PFP.system_type_id = T.system_type_id
),
PF2 AS(
SELECT * FROM PF1
UNION ALL
SELECT
function_id, boundary_id = boundary_id - 1, value, type
FROM PF1
WHERE boundary_id = 1
),
PF AS(
SELECT
B.function_id, boundary_id = ISNULL(B.boundary_id + 1, 1),
value = STUFF(
CASE
WHEN A.boundary_id IS NULL THEN ''
ELSE ' AND [partition_column_name] ' + PF.LessThan + ' ' + CONVERT(varchar(max), A.value) END
+ CASE
WHEN A.boundary_id = 1 THEN ''
ELSE ' AND [partition_column_name] ' + PF.MoreThan + ' ' + CONVERT(varchar(max), B.value) END,
1, 5, ''),
B.Type
FROM PF1 A
RIGHT JOIN PF2 B
ON A.function_id = B.function_id
AND (A.boundary_id - 1 = B.boundary_id
OR(A.boundary_id IS NULL AND B.boundary_id IS NULL))
INNER JOIN(
SELECT
function_id,
LessThan = CASE
WHEN boundary_value_on_right = 0 THEN '<='
ELSE '<' END,
MoreThan = CASE
WHEN boundary_value_on_right = 0 THEN '>'
ELSE '>=' END
FROM sys.partition_functions
)PF
ON B.function_id = PF.function_id
),
PS AS(
SELECT
DDS.partition_scheme_id, DDS.destination_id,
FileGroupName = FG.name, IsReadOnly = FG.is_read_only
FROM sys.destination_data_spaces DDS
INNER JOIN sys.filegroups FG
ON DDS.data_space_id = FG.data_space_id
),
PINFO AS(
SELECT
RowID = ROW_NUMBER() OVER(ORDER BY SchemaID, ObjectID, PS.destination_id),
TB.SchemaName, TB.TableName,
TB.PartitionScheme, PS.destination_id, PS.FileGroupName, PS.IsReadOnly,
TB.PartitionFunction, TB.PartitionFunctionRangeType, TB.PartitionFunctionFanout,
PF.boundary_id, PF.Type, PF.value
FROM TBINFO TB
INNER JOIN PS
ON TB.PartitionSchemeID = PS.partition_scheme_id
LEFT JOIN PF
ON TB.PartitionFunctionID = PF.function_id
AND PS.destination_id = PF.boundary_id
)
SELECT
RowID,
SchemaName = CASE destination_id
WHEN 1 THEN SchemaName
ELSE N'' END,
TableName = CASE destination_id
WHEN 1 THEN TableName
ELSE N'' END,
PartitionScheme = CASE destination_id
WHEN 1 THEN PartitionScheme
ELSE N'' END,
destination_id, FileGroupName, IsReadOnly,
PartitionFunction = CASE destination_id
WHEN 1 THEN PartitionFunction
ELSE N'' END,
PartitionFunctionRangeType = CASE destination_id
WHEN 1 THEN PartitionFunctionRangeType
ELSE N'' END,
PartitionFunctionFanout = CASE destination_id
WHEN 1 THEN CONVERT(varchar(20), PartitionFunctionFanout)
ELSE N'' END,
boundary_id = ISNULL(CONVERT(varchar(20), boundary_id), ''),
Type = ISNULL(Type, N''),
value = CASE PartitionFunctionFanout
WHEN 1 THEN '<ALL Data>'
ELSE ISNULL(value, N'<NEXT USED>') END
FROM PINFO
ORDER BY RowID
--==================================
--drop database dbPartitionTest
--测试数据库
create database dbPartitionTest
go
use
dbPartitionTest
go
--增加分组
alter database dbPartitionTest ADD FILEGROUP P200801
alter database dbPartitionTest ADD FILEGROUP P200802
alter database dbPartitionTest ADD FILEGROUP P200803
go
--分区函数
CREATE PARTITION FUNCTION part_Year(datetime)
AS RANGE LEFT FOR VALUES
(
’20080131 23:59:59.997’,’20080229 23:59:59.997’,’20080331 23:59:59.997’
)
go
--增加文件组
ALTER DATABASE dbPartitionTest ADD FILE (NAME = N’P200801’,FILENAME = N’c:tb_P200801.ndf’,SIZE = 1MB,MAXSIZE = 500MB,FILEGROWTH = 1MB)TO FILEGROUP P200801
ALTER DATABASE dbPartitionTest ADD FILE (NAME = N’P200802’,FILENAME = N’c:tb_P200802.ndf’,SIZE = 1MB,MAXSIZE = 500MB,FILEGROWTH = 1MB)TO FILEGROUP P200802
ALTER DATABASE dbPartitionTest ADD FILE (NAME = N’P200803’,FILENAME = N’c:tb_P200803.ndf’,SIZE = 1MB,MAXSIZE = 500MB,FILEGROWTH = 1MB)TO FILEGROUP P200803
go
--分区架构
CREATE PARTITION SCHEME part_YearScheme AS PARTITION part_Year TO (P200801,P200802,P200803,[PRIMARY])
go
CREATE TABLE [dbo].t_part
(name varchar(100) default newid(),date datetime NOT NULL)
ON part_YearScheme (date)
go
--添加测试数据,每天1条
declare @date datetime
set @date=’2007-12-31’
while @date<=’2008-04-0’
1 begin
insert into t_part(date)values(@date)
set @date=@date+1
end
go
--查询数据分布在哪些分区
select $partition.part_Year(date) as 分区编号,* from t_part order by date
--查询数据库文件
go
sp_helpfile
sqlserver2000
分别创建多个,然后在创建一个大的视图,进行查询
sqlserver2005
这个可以分区
oracle
这个也可以分区这样会好点的
一、从设计角度讲,通过分区表把数据的管理逻辑封装在数据模型层面,而对业务层面不产生影响。
二、如果把数据进行物理分割,有两点不可预料:
1、物理分割是否合理灵活,后期维护是否简单方便?
我们以前有个系统对数据进行了物理分表(年度表),导致两个结果:
1)数据多少不可预料,客户数据逐年递增,如07年的数据是05年的4倍大;
2)后面维护的同事对其苦不堪言,跨年度数据的处理非常麻烦;
3)不利于数据挖掘,物理分表就把一些潜在的规律人为打断,数据的连续感丧失。
2、是否需要程序逻辑对其支持?即如何前端程序才能认为这些数据是个整体?
由于物理分割了表,那么前端业务程序如何获得所需数据,就是一个头疼的问题。
1)建立逻辑视图,提出这种意见的人肯定没有真正处理过大表,比如逻辑数据1千万条,对这样的视图检索除非其主要条件正好过滤了大部分数据,否则检索将十分可怕;
2)如果不能建立逻辑视图,那么前端业务必然要明确知道那些物理表的设置,那么支持动态适应物理表的多少又是一个陷阱;
3)检索还好说,更新呢?需要更新的数据可能分布在不同的物理表中,逻辑又变得复杂了。
3、由于前面1和2的问题,导致基于分表设计的应用系统,一般或多或少的会对功能有一些阉割,换句话说就是实现得总是那么欠缺完美。基于上面理由,建议使用分区表的思路设计大数据表,与之相匹配的还有诸如索引分区、二次分区等等思路,通过这些可以进一步提升数据的容积,也进一步提高产品形象。其他的所有方式都需要通过补充数据库端代码逻辑来表示所
这位仁兄的意见不错,上面有位兄弟直接就拿出个分区表的方案出来,
如果是sql 2000或之前的数据库怎么办?呵呵...