准备:
1.perl编译环境
2.Excel模板
3.Perl脚本
安装Perl
首先,安装Perl编译环境,大部分人使用的是Windows环境,到www.perl.org下载对应版本即可,推介下载ActivePerl。然后傻瓜式安装,默认选项,不停点击“下一步”即可,环境变量也会自动添加不用管,程序会默认安装到C:\Perl\bin下。
生成SQL语句的Perl脚本
脚本如下:
注意一点,脚本里 ${AUTO_HOME} 需要配置环境变量,指定一个路径,这个路径会存放生成好的SQL文件,当然,写死也是可以的,要注意反斜线转义,比如写成 my ${AUTO_EXCEL}= "D:\\ETL\\excel";
脚本里拼接的SQL是根据Teradate数据库的语法规则,其他数据库做相应调整即可。
1 #!usr\bin\perl 2 use strict; 3 use Win32::OLE qw(in with); 4 use Win32::OLE::Const \'Microsoft.Excel\'; 5 #use Win32::API; 6 7 8 9 10 my ${excelFile} = $ARGV[0]; #Y# 字段级excel表格,比如:SE0_filedTask.xls 11 my ${TXDATE} = $ARGV[1]; #Y# 数据日期,比如:20190920 12 my ${MMDD} = substr(${TXDATE} , 4 , 4); 13 14 if ($#ARGV ne 1){ 15 print "\n==========================================================\n"; 16 print "perl AutoReadExcelToDDL.pl SM2.xls 20201016"; 17 print "\n==========================================================\n"; 18 exit(-1); 19 } 20 21 my ${datawareTable} =\'\'; 22 my ${sourceTable} =\'\'; 23 my ${datawareTableChineseName} =\'\'; 24 my ${fieldName} =\'\'; 25 my ${fieldType} =\'\'; 26 my ${fieldChineseName} =\'\'; 27 my ${fieldFormat} =\'\'; 28 my ${PPI} =\'\'; 29 30 my ${AUTO_HOME} = $ENV{"AUTO_HOME"}; 31 my ${AUTO_EXCEL}= "${AUTO_HOME}/excel"; #注意AUTO_HOME要自己在环境变量里面设置 32 my ${table_list} = ""; 33 34 35 my $ret = main(); 36 37 exit($ret); 38 39 sub main() 40 { 41 getExcelToDDL(); 42 43 44 return 0; 45 } 46 47 sub getExcelToDDL 48 { 49 my $datawareTableChineseName_bak = \'\'; 50 my $datawareTable_bak = \'\'; 51 my $sourceTable_bak = \'\'; 52 53 my $selSQL = \'\'; #Y#此变量用来存储查询数据的SQL(只查前20条) 54 my $ppiSQL = \'\'; #Y#此变量用来存储统计主键重复的SQL 55 56 my $ddl = ""; 57 my $testInfo = ""; #Y#存储’源系统表名 数据平台源表主干名 源表中文名‘,会打印在SEL.sql文档最前面 58 59 my @ppi = (); 60 my $num = 1; 61 my $count_primary_key = 0; 62 63 my $myExcel=Win32::OLE->new(\'Excel.Application\',sub {$_[0]->QUIT})||Win32::OLE->GetActiveObject(\'Excel.Application\')||die "can\'t install the Excel.Application"; 64 my $myBook=$myExcel->workBooks->Open("$excelFile"); 65 my $mySheet=$myBook->workSheets(1); 66 67 my ${RowCount}=$mySheet->{UsedRange}->{Rows}->{Count};#获取EXCEL中有用的行数 68 69 my $temp = \'\'; 70 my $row = 2; 71 72 while(1) 73 { 74 $row++; 75 76 $temp = $mySheet->Cells($row,1)->{Value}; 77 $temp =~s/[\x00-\x20|\x7F]//g; 78 ${datawareTable} = "$temp"; #Y# 对应Excel第1列的数据”数据平台源表主干名“ 79 80 $temp = $mySheet->Cells($row,5)->{Value}; 81 $temp =~s/[\x00-\x20|x7F]//g; 82 ${sourceTable} = "$temp"; #Y# 对应Excel第5列的数据”源系统表名“ 83 84 $temp = $mySheet->Cells($row,6)->{Value}; 85 $temp =~s/[\x00-\x20|x7F]//g; 86 ${datawareTableChineseName} = "$temp"; #Y# 对应Excel第6列的数据”源表中文名“ 87 88 $temp = $mySheet->Cells($row,2)->{Value}; 89 $temp =~s/[\x00-\x20|x7F]//g; 90 ${fieldName} = "$temp"; #Y# 对应Excel第2列的数据“数据平台源字段名” 91 92 $temp = $mySheet->Cells($row,13)->{Value}; 93 $temp =~s/[\x00-\x20|x7F]//g; 94 ${fieldType} = "$temp"; #Y# 对应Excel第13列的数据“数据平台数据类型” 95 96 $temp = $mySheet->Cells($row,8)->{Value}; 97 $temp =~s/[\x00-\x20|x7F]//g; 98 ${fieldChineseName} = "$temp"; #Y# 对应Excel第8列的数据“源字段中文名” 99 100 $temp = $mySheet->Cells($row,14)->{Value}; 101 $temp =~s/[\x00-\x20|x7F]//g; 102 ${fieldFormat} = "$temp"; #Y# 对应Excel第14列的数据“数据平台数据类型补充内容” 103 104 $temp = $mySheet->Cells($row,15)->{Value}; 105 $temp =~s/[\x00-\x20|x7F]//g; 106 ${PPI} = "$temp"; #Y# 对应Excel第15列的数据“唯一索引或主键字段[UI1,UI2/空]” 107 108 if($ddl eq "") 109 { 110 $ddl = $ddl.".LOGON 192.168.1.28/dbc,dbc;\n"; 111 $ddl = $ddl."DATABASE petl;\n"; 112 113 $ddl = $ddl."--$num.${datawareTableChineseName}\n";$num++; 114 $datawareTableChineseName_bak = ${datawareTableChineseName}; 115 $datawareTable_bak = ${datawareTable}; 116 $sourceTable_bak = ${sourceTable}; 117 118 119 $testInfo = $testInfo."$sourceTable"."\t$datawareTable"."\t$datawareTableChineseName\n"; 120 121 $ddl = $ddl."DROP TABLE ${datawareTable};\n"; 122 $ddl = $ddl."CREATE MULTISET TABLE ${datawareTable}\n"; 123 $ddl = $ddl."(\n"; 124 125 $ddl = $ddl."${fieldName} ${fieldType} ${fieldFormat} TITLE \\'${fieldChineseName}\\'\n"; 126 if (${PPI}=~m/^UI1|Y|PK$/i){unshift @ppi , ${fieldName};} 127 128 next; 129 } 130 131 #if(${datawareTableChineseName} ne $datawareTableChineseName_bak) 132 if(${datawareTable} ne $datawareTable_bak) 133 { 134 $selSQL = $selSQL."SELECT * FROM sdb.$datawareTable_bak${MMDD} sample 20;\n"; 135 $ppiSQL = $ppiSQL."SELECT COUNT(*) FROM sdb.$datawareTable_bak${MMDD} GROUP BY ".join(",", @ppi)." HAVING COUNT(*)<>1;\n"; 136 137 #if (${datawareTableChineseName} eq \'\'){last;} 138 if(${datawareTable} eq \'\'){last;} 139 140 $ddl = $ddl.")\n"; 141 #$ddl = $ddl."PRIMARY INDEX(".join(",", @ppi).");\n"; 142 $count_primary_key = @ppi; #Y#统计主索引的数量 143 if($count_primary_key eq 0) 144 { 145 $ddl = $ddl.";\n"; 146 } 147 else 148 { 149 $ddl = $ddl."PRIMARY INDEX(".join(",", @ppi).");\n"; 150 } 151 $ddl = $ddl."COMMENT ON TABLE $datawareTable_bak IS \\'$datawareTableChineseName_bak\\';\n\n"; 152 $testInfo = $testInfo."$sourceTable"."\t$datawareTable"."\t$datawareTableChineseName\n"; 153 154 @ppi = (); 155 156 $datawareTableChineseName_bak = ${datawareTableChineseName}; 157 $datawareTable_bak = ${datawareTable}; 158 $sourceTable_bak = ${sourceTable}; 159 160 $ddl = $ddl."--$num.${datawareTableChineseName}\n";$num++; 161 $ddl = $ddl."DROP TABLE ${datawareTable};\n"; 162 $ddl = $ddl."CREATE MULTISET TABLE ${datawareTable}\n"; 163 $ddl = $ddl."(\n"; 164 165 $ddl = $ddl."${fieldName} ${fieldType} ${fieldFormat} TITLE \\'${fieldChineseName}\\'\n"; 166 if(${PPI}=~m/^UI1|Y|PK$/i){unshift @ppi , ${fieldName};} 167 168 next; 169 } 170 171 $ddl = $ddl.",${fieldName} ${fieldType} ${fieldFormat} TITLE \\'${fieldChineseName}\\'\n"; 172 if(${PPI}=~m/^UI1|Y|PK$/i){unshift @ppi , ${fieldName};} 173 } #Y# 跳出点 174 175 $ddl = $ddl.")\n"; 176 #$ddl = $ddl."PRIMARY INDEX(".join("," , @ppi).");\n"; 177 $count_primary_key = @ppi; 178 if($count_primary_key eq 0) 179 { 180 $ddl = $ddl.";\n"; 181 } 182 else 183 { 184 $ddl = $ddl."PRIMARY INDEX(".join(",", @ppi).");\n"; 185 } 186 $ddl = $ddl."COMMENT ON TABLE $datawareTable_bak IS \\'$datawareTableChineseName_bak\\';\n\n"; 187 #$testInfo = $testInfo."$sourceTable"."\t$datawareTable"."\t$datawareTableChineseName_bak\n"; 188 @ppi = (); 189 190 $ddl = $ddl."\n"; 191 $ddl = $ddl.".LOGOFF;\n"; 192 $ddl = $ddl.".QUIT 0;\n"; 193 194 undef $myBook; 195 undef $myExcel; 196 197 $excelFile=~m/\\(?<ddlFile>[^\\]*)$/; 198 199 if(!open(WF , ">${AUTO_EXCEL}\\$+{ddlFile}.sql")) 200 { 201 print "can not open ${AUTO_EXCEL}\\$+{ddlFile}.sql\n"; 202 exit(1); 203 } 204 print WF "$ddl"; 205 close(WF); 206 207 if(!open(WFSQL , ">${AUTO_EXCEL}\\$+{ddlFile}SEL.sql")) 208 { 209 print "can not open ${AUTO_EXCEL}\\$+{ddlFile}SEL.sql\n"; 210 exit(1); 211 } 212 print WFSQL $testInfo."\n\n\n".$selSQL."\n".$ppiSQL; 213 close(WFSQL); 214 215 system("bteq<${AUTO_EXCEL}\\$+{ddlFile}.sql>${AUTO_EXCEL}\\$+{ddlFile}.log"); 216 217 return 0; 218 }
Excel模板
Excel模板里记录的是各个接口的字段名、字段类型、主键等信息,脚本也是根据模板来编写的,因此模板和脚本是紧密相关的,不可随意更改,否则脚本也要做出相应的调整。
模板如下
执行脚本
D:\ETL\PerlScript>perl D:\ETL\PerlScript\AutoReadExcelToDDL.pl D:\ETL\excel\FieldTask.xls
执行后就生成了一个SQL文件:
D:\ETL\excel\SI3_filedTask_11.xlsx.sql
打开文件,查看到的建表SQL如下所示,复制到客户端执行即可:
结束。
请发表评论