加载中…
个人资料
  • 博客等级:
  • 博客积分:
  • 博客访问:
  • 关注人气:
  • 获赠金笔:0支
  • 赠出金笔:0支
  • 荣誉徽章:
正文 字体大小:

面板数据分位数回归示例

(2015-01-11 23:48:22)
标签:

面板

分位数

回归

r

分类: 15R语言
编者按:面板数据的分位数 回归已经不是什么新鲜玩意儿了,但是 这里还是 要总结一下,就是R中quantreg的rq.fit.sfn。但是这个程序只运行出coeff,而t值和p值都没有,可以使用bootstrap计算(使用block bootstrap即可)。不知道 rqpd包 怎么样,但是我的计算机不是64位,用不了。这里只能总结一下原始的方法了。
包 install.packages("rqpd") 64位计算机安装才行。





rm(list = ls())

############################## 
rq.fit.panel<-function(X,y,s,w=1,taus=0.8,lambda = 1){ 
  # prototype function for panel data fitting of QR models 
  # the matrix X is assumed to contain an intercept 
  # the vector s is a strata indicator assumed (so far) to be a one-way layout 
  # NB:   
  # 1.  The value of the shrinkage parameter lambda is an open research problem in 
  #       the simplest homogneous settings it should be the ratio of the scale parameters 
  #       of the fixed effects and the idiocyncratic errors 
  # 2.  On return the coefficient vector has m*p + n elements where m is the number 
  #       quantiles being estimated, p is the number of colums of X, and n is the 
  #       number of distinct values of s.  The first m*p coefficients are the  
  #       slope estimates, and the last n are the "fixed effects" 
  # 3.  Like all shrinkage (regularization) estimators, asymptotic inference is somewhat 
  #       problematic... so the bootstrap is the natural first resort. 
  
  
  require(SparseM) 
  require(quantreg) 
  K <- length(w) 
  if(K != length(taus)) 
    stop("length of w and taus must match") 
  X <- as.matrix(X) 
  p <- ncol(X) 
  n <- length(levels(as.factor(s))) 
  N <- length(y) 
  if(N != length(s) || N != nrow(X)) 
    stop("dimensions of y,X,s must match") 
  Z <- as.matrix.csr(model.matrix(~as.factor(s)-1)) 
  Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z) 
  Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr")) 
  D <- rbind(Fidelity,Penalty) 
  y <- c(w %x% y,rep(0,n)) 
  a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)), 
         sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1,n)) 
  rq.fit.sfn(D,y,rhs=a) 


m <- 3
n <- 10
s <- rep(1:n,rep(m,n))
x <- exp(rnorm(n*m))
X <- cbind(1,x)
u <- x*rnorm(m*n) + (1-x)*rf(m*n,3,3)
a <- rep(rnorm(n),rep(m,n))
y <- a + u


#########rq.fit.panel 
fit.panel <- rq.fit.panel(X,y,s) 
fit.panel 
#


######block bootstrap
n1=length(y)  #30
b=1000     #Number of bootstrap samples
ncoef=12
boot_bhat=matrix(NA,b, ncoef)
block_length = 10  
num_blocks = n1/block_length    #n/block_length  
Indices = seq(1:n1)  # All of the indices from 1 to n
Indices = matrix(Indices,block_length,num_blocks)
for (i in 1:b){          #Number of bootstrap samples
  randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use
  Ind_sim = Indices[,randblock]       #Find which data are in each block  
  Ind_sim = c(Ind_sim)       
  Xsim = X[Ind_sim,1:dim(X)[2]]      #Construct the x data
  Ysim = y[Ind_sim]          #Construct the y data
  boot_bhat[i,] = rq.fit.panel(Xsim,Ysim,s)$coef
}
bhat=colMeans(boot_bhat)
#bhat

cov=cov(boot_bhat)
serr = sqrt(diag(cov))
#serr    

p <- length(bhat)
rdf <- n1 - p
vnames<- dimnames(x)[[2]]
coef <- array(bhat, c(p, 4))
dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))
coef[, 2] <- serr
coef[, 3] <- coef[, 1]/coef[, 2]
coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))
coef





我国教育投入对经济增长贡献率的实证研究

——基于面板分位数回归模型

 

湖南大学  吴之亮、游万海、刘炼




附录

R软件程序脚本:

rq.fit.panel<-function(X,y,s,w=c(.1,.25,.5,.25,.1),taus=c(0.1,0.25,0.5,0.75,0.9),lambda = 1){

# prototype function for panel data fitting of QR models

# the matrix X is assumed to contain an intercept

# the vector s is a strata indicator assumed (so far) to be a one-way layout

# NB: 

# 1.  The value of the shrinkage parameter lambda is an open research problem in

      the simplest homogneous settings it should be the ratio of the scale parameters

      of the fixed effects and the idiocyncratic errors

# 2.  On return the coefficient vector has m*p + n elements where m is the number

      quantiles being estimated, p is the number of colums of X, and n is the

      number of distinct values of s.  The first m*p coefficients are the

      slope estimates, and the last n are the "fixed effects"

# 3.  Like all shrinkage (regularization) estimators, asymptotic inference is somewhat

      problematic... so the bootstrap is the natural first resort.

 

 

        require(SparseM)

        require(quantreg)

        K <- length(w)

        if(K != length(taus))

                stop("length of w and taus must match")

        X <- as.matrix(X)

        p <- ncol(X)

        n <- length(levels(as.factor(s)))

        N <- length(y)

        if(N != length(s) || N != nrow(X))

                stop("dimensions of y,X,s must match")

        Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))

        Fidelity <- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)

        Penalty <- cbind(as.matrix.csr(0,n,K*p),lambda*as(n,"matrix.diag.csr"))

        D <- rbind(Fidelity,Penalty)

        y <- c(w %x% y,rep(0,n))

        a <- c((w*(1-taus)) %x% (t(X)%*%rep(1,N)),

                sum(w*(1-taus)) * (t(Z) %*% rep(1,N)) + lambda * rep(1/2,n))

        rq.fit.sfn(D,y,rhs=a)

        }

 

R统计软件操作程序:

library(“quantreg”)

source(“rq.fit.panel”)

m <- 12

n <- 8

s <- rep(1:n,rep(m,n))

data<-read.csv("zhongbu.csv")

x<-data$LNVEDU

X<-cbind(1,x)

y<-data$LNVGDP

fit<- rq.fit.panel(X,y,s)

fit

 

n1=length(y)                  

b=2000                        #Number of bootstrap samples

ncoef=5*ncol(X)+n              

boot_bhat=matrix(NA,b, ncoef)

block_length =8               #代表块状长度

num_blocks = n1/block_length    #n/block_length 

Indices = seq(1:n1)             # All of the indices from 1 to n

Indices = matrix(Indices,block_length,num_blocks)

for (i in 1:b){                 #Number of bootstrap samples

randblock =sample(seq(1:num_blocks),num_blocks,replace = TRUE) # Choose which blocks to use

Ind_sim = Indices[,randblock]       #Find which data are in each block 

Ind_sim = c(Ind_sim)      

Xsim = X[Ind_sim,1:dim(X)[2]]       #Construct the x data

Ysim = y[Ind_sim]                   #Construct the y data

boot_bhat[i,] = rq.fit.panel(Xsim,Ysim,s)$coef

}

bhat=colMeans(boot_bhat)

cov=cov(boot_bhat)

serr = sqrt(diag(cov))  

 

p <- length(bhat)

rdf <- n1 - p                      

vnames<- dimnames(x)[[2]]

coef <- array(bhat, c(p, 4))

dimnames(coef) <- list(vnames, c("Value", "Std. Error", "t value","Pr(>|t|)"))

coef[, 2] <- serr

coef[, 3] <- coef[, 1]/coef[, 2]

coef[, 4] <- if (rdf > 0) 2 * (1 - pt(abs(coef[, 3]), rdf))

coef









作者: epoh    时间: 2011-4-3 16:56:23

Grunfeld   Grunfeld Investment Data
  a panel of 10 observations from 1935 to 1954
  number of observations : 200
##########
library(SparseM)
library(quantreg)
library(Ecdat)
data(Grunfeld)
#panel data Grunfeld
#n=10, T=20, N=200
x1=Grunfeld$value
x2=Grunfeld$capital
X=cbind(x1,x2)
y=Grunfeld$inv
s= rep(1:10,rep(20,10))
w=1
n <- length(levels(as.factor(s)))
Z <- as.matrix.csr(model.matrix(~as.factor(s)-1))
xx<- cbind(as(w,"matrix.diag.csr") %x% X,w %x% Z)
####################
tt=as.matrix(xx)
taus=seq(0.1,0.9,0.01)
ntau=length(taus)  #81
coef1mat=matrix(NA,ntau,5)
coef2mat=matrix(NA,ntau,5)
rq.fit=rq(y~tt-1,taus)
sum=summary(rq.fit,"ker")
#If n is large(>1000), t=1.96
t=1.96
for (i in 1:ntau){
#coefficient1
coef1mat[i,1]=sum[]$tau
coef1mat[i,2]=sum[
]$coefficients[1,1]
coef1mat[i,3]=sum[]$coefficients[1,2]
coef1mat[i,4]=sum[
]$coefficients[1,1]+t*sum[]$coefficients[1,2]
coef1mat[i,5]=sum[
]$coefficients[1,1]-t*sum[]$coefficients[1,2]
#coefficient2
coef2mat[i,1]=sum[
]$tau
coef2mat[i,2]=sum[]$coefficients[2,1]
coef2mat[i,3]=sum[
]$coefficients[2,2]
coef2mat[i,4]=sum[]$coefficients[2,1]+t*sum[]$coefficients[2,2]
coef2mat[i,5]=sum[]$coefficients[2,1]-t*sum[]$coefficients[2,2]
}
coef1mat
coef2mat
###plot coef1,coef2 95%CI
par(mfrow = c(1, 2))
####coefficient1
upperbd1=coef1mat[,4]
lowerbd1=coef1mat[,5]
coef1=coef1mat[,2]
plot(taus,upperbd1,type = 'n',  ylim = c(0.00, 0.20),
     ylab = 'Coef1 Value', xlab = 'Quantile')
lines(taus, lowerbd1, col = 'grey')
lines(taus, upperbd1, col = 'grey')
polygon(c(taus, rev(taus)), c(upperbd1, rev(lowerbd1)),
     col = "lightblue", border = 'black')
points(taus, coef1,col='blue',pch=21)
lines(taus, coef1,col='blue')
title("gross Investment vs value of the firm") 
####coefficient2
upperbd2=coef2mat[,4]
lowerbd2=coef2mat[,5]
coef2=coef2mat[,2]
plot(taus,upperbd2,type = 'n',  ylim = c(0.00, 0.50),
     ylab = 'Coef2 Capital', xlab = 'Quantile')
lines(taus, lowerbd2, col = 'grey')
lines(taus, upperbd2, col = 'grey')
polygon(c(taus, rev(taus)), c(upperbd2, rev(lowerbd2)),
     col = "lightblue", border = 'black')
points(taus, coef2,col='blue',pch=21)
lines(taus, coef2,col='blue')
title("gross Investment vs stock of plant and equipment") 

0

阅读 收藏 喜欢 打印举报/Report
  

新浪BLOG意见反馈留言板 欢迎批评指正

新浪简介 | About Sina | 广告服务 | 联系我们 | 招聘信息 | 网站律师 | SINA English | 产品答疑

新浪公司 版权所有