图文来自
https://blog.csdn.net/love130401/article/details/20800003
#从soft文件中得到mRNA
和 miRNA 的biaoda值
#或者在命令行直接运行 getgeo
--file=GSE16441_family.soft
#【若soft
数据在多个platform上得到的,就分别运行】
use
Microarray::GEO::SOFT;
use
Cwd;
#
initialize
my $soft =
Microarray::GEO::SOFT->new;
$soft =
Microarray::GEO::SOFT->new(file =>
"GSE16441_family.soft");#【soft数据输入】
#
parse
# it returns
a Microarray::GEO::SOFT::GDS,
#
Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL
object
# according
the the GSE ID type
my $data =
$soft->parse;
# sinece GSE
can contain more than one
GPL【一个GPL测一类数据,测mRNA和miRNA不在一个平台】
# we can get
the GPL list in a GSE
my $gpl_list =
$data->list("GPL");#【多个platform存储在变量my $gpl_list 】
# merge
samples belonging to a same GPL into a data set
my $gds_list =
$data->merge;
# if the GSE
only have one platform
# then the
merged data set is the first one in gds_list
# and the
platform is the first one in gpl_list
my $g =
$gds_list->[0];#【处理第一个platform数据】
my $gpl =
$gpl_list->[0];
# since GPL
data contains different mapping of genes or probes
# we can
transform from probe id to gene symbol
# it returns a
Microarray::ExprSet object
my $e =
$g->id_convert($gpl, 'GENE_SYMBOL'); #第二个参数取决于soft文件中 gene
symbol所在列的列名
# then you can
do some simple processing thing
# eliminate
the blank lines
$e->remove_empty_features;
# make all
symbols unique
$e->unique_features;
# obtain the
XXXX matrix
$e->save('kidney-mRNA-XXXX');
#【数据输出,设置输出文件名】
#【同样,稍作修改,处理该soft数据中下一个platform(GPL)的数据】
use
Microarray::GEO::SOFT;
use
Cwd;
#
initialize
my $soft =
Microarray::GEO::SOFT->new;
$soft =
Microarray::GEO::SOFT->new(file =>
"GSE16441_family.soft");
#
parse
# it returns
a Microarray::GEO::SOFT::GDS,
#
Microarray::GEO::SOFT::GSE or Microarray::GEO::SOFT::GPL
object
# according
the the GSE ID type
my $data =
$soft->parse;
# sinece GSE
can contain more than one GPL
# we can get
the GPL list in a GSE
my $gpl_list =
$data->list("GPL");
# merge
samples belonging to a same GPL into a data set
my $gds_list =
$data->merge;
# if the GSE
only have one platform
# then the
merged data set is the first one in gds_list
# and the
platform is the first one in gpl_list
my $g =
$gds_list->[1];
my $gpl =
$gpl_list->[1];
# since GPL
data contains different mapping of genes or probes
# we can
transform from probe id to gene symbol
# it returns a
Microarray::ExprSet object
my $e =
$g->id_convert($gpl, 'miRNA_ID'); #第二个参数取决于soft文件中 gene
symbol所在列的列名
# then you can
do some simple processing thing
# eliminate
the blank lines
$e->remove_empty_features;
# make all
symbols unique
$e->unique_features;
# obtain the
XXXX matrix
$e->save('kidney-miRNA-XXXX');