thr3ads.net - R help - [R] R Help [Mar 2015]

If this information is useful, please help other people find it:
Share via:

Rami Alzebdieh

2015-Mar-02 09:38 UTC

[R] R Help

Dear Sir,

I start using (R) 3 months ago, and I am still learning, I have a project and I
am using R in this project, my friend helped me to build a code for this project
and it's working perfect, but I need to make a small change in, it looks
very simple but for me it's very complicated. I insert the code and I hope
if you can help me this problem. I highlighted what exactly I need to change.
This project is calculating the market and industry weighted returns for each
based on the date levels.

sync = read.csv("country-14.csv",header=T)
id.country = 14

sync = sync[sync$country!="country" &
sync$country==id.country,-c(2,5)]
sync$price=as.numeric(as.character(sync$price))
sync$mv=as.numeric(as.character(sync$mv))
attach(sync)

#### Calculate returns and add to the dataset
n.comp = nlevels(as.factor(as.character(sync$company_name)))
comp.names = levels(as.factor(as.character(sync$company_name)))
data = vector("list",n.comp)
for(i in 1:n.comp){
  temp = sync[sync$company_name==comp.names[i],]
  data[[i]] =
cbind(temp,c(NA,diff(temp$price)/temp$price[1:(length(temp$price)-1)]))
}
sync = do.call(rbind,data)
names(sync)[7] = "returns"
detach(sync)
attach(sync)

#### Fill industry_code column
industry_code=rep(NA,dim(sync)[1])
for(i in 1:dim(sync)[1]){
  if(nchar(as.character(company_code[i])) == 3){
    industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,1))
  } else {
    industry_code[i] = as.numeric(substr(as.character(company_code[i]),1,2))
  }
  print((i/dim(sync)[1])*100)
}
sync = cbind(sync,as.factor(industry_code))
names(sync)[8] = "industry_code"
detach(sync)
attach(sync)

#### Calculate market weighted returns and add to the dataset
market_returns = rep(NA,dim(sync)[1])
industry_returns = rep(NA,dim(sync)[1])
for(i in 1:nlevels(date)){
    data = sync[date==levels(date)[i],]
    data$company_name = as.factor(as.character(data$company_name))
    for(m in 1:nlevels(data$company_name)){
      index1 = data$company_name == levels(data$company_name)[m]
      index2 = date==levels(date)[i] &
company_name==levels(data$company_name)[m]
      market_returns[index2] =
(sum(data$returns*(data$mv/sum(data$mv,na.rm=TRUE)),na.rm=TRUE) -
       
(data$returns[index1]*(data$mv[index1]/sum(data$mv,na.rm=TRUE))))/(nlevels(data$company_name)-1)
## this what I need to change, instead of using the number of levels companies
in the dataset (nlevels(data$company_name) , I need to put the number of returns
values(data$returns) without NA (by the way this code is calculating returns at
the date level as you can see from above)
    }
  print(i/nlevels(date))
}

sync = cbind(sync,market_returns)
names(sync)[9] = "market_returns"
detach(sync)
attach(sync)

#### Calculate industry weighted returns and add to the dataset
for(i in 1:nlevels(date)){
    for(k in 1:nlevels(as.factor(as.character(industry_code)))){
      data1 = sync[date==levels(date)[i] &
industry_code==levels(as.factor(as.character(industry_code)))[k],]
      data1$company_name = as.factor(as.character(data1$company_name))
      for(l in 1:nlevels(data1$company_name)){
        index3 = data1$company_name == levels(data1$company_name)[l]
        index4 = date==levels(date)[i] &
company_name==levels(data1$company_name)[l]
        industry_returns[index4] =
(sum(data1$returns*(data1$mv/sum(data1$mv,na.rm=TRUE)),na.rm=TRUE) -
         
(data1$returns[index3]*(data1$mv[index3]/sum(data1$mv,na.rm=TRUE))))/(nlevels(data1$company_name)-1)
## also here I need to change, instead of using the number of levels companies
in the dataset (nlevels(data1$company_name) , I need to put the number of
returns values(data1$returns) without NA (by the way this code is calculating
returns at the date level and industry level as you can see from above)

      }
    }
  print(i/nlevels(date))
}

sync = cbind(sync,industry_returns)
names(sync)[10] = "industry_returns"
detach(sync)
attach(sync)

year = apply(as.matrix(sync$date),1,function(x)
as.factor(substr(as.character(x),7,10)))
sync = cbind(sync,as.factor(year))
names(sync)[11] = "year"
sync = sync[sync$year!="1999",]
sync$year = as.factor(as.character(sync$year))
detach(sync)
attach(sync)

year = as.factor(as.character(year))
industry_code = as.factor(as.character(industry_code))
comp.per.ind = rep(NA, dim(sync)[1])
for(i in 1:nlevels(year)){
  for(j in 1:nlevels(industry_code)){
    index = year==levels(year)[i] & industry_code==levels(industry_code)[j]
    data = sync[index,]
    comp.per.ind[index] = nlevels(as.factor(as.character(data$company_name)))
  }
}

sync = cbind(sync,as.factor(comp.per.ind))
names(sync)[12] = "comp.per.ind"
detach(sync)
attach(sync)

write.csv(sync,paste("Returns_data",id.country,".csv",sep=""))




Thank you for your help

Rami Alzebdieh



	[[alternative HTML version deleted]]

Uwe Ligges

2015-Mar-02 23:03 UTC

head link

[R] R Help

Better ask for local help if you can't reduce your code to some minimal 
examples so that we can understand easily what you are looking for.


On 02.03.2015 10:38, Rami Alzebdieh wrote:> Dear Sir,
>
> I start using (R) 3 months ago,  and I am still learning,
Same for me .... after more than 16 years.

Best,
Uwe Ligges




 > I have a project and I am using R in this project, my friend helped 
me to build a code for this project and it's working perfect, but I need 
to make a small change in, it looks very simple but for me it's very 
complicated. I insert the code and I hope if you can help me this 
problem. I highlighted what exactly I need to change. This project is 
calculating the market and industry weighted returns for each based on 
the date levels.>
> sync = read.csv("country-14.csv",header=T)
> id.country = 14
>
> sync = sync[sync$country!="country" &
sync$country==id.country,-c(2,5)]
> sync$price=as.numeric(as.character(sync$price))
> sync$mv=as.numeric(as.character(sync$mv))
> attach(sync)
>
> #### Calculate returns and add to the dataset
> n.comp = nlevels(as.factor(as.character(sync$company_name)))
> comp.names = levels(as.factor(as.character(sync$company_name)))
> data = vector("list",n.comp)
> for(i in 1:n.comp){
>    temp = sync[sync$company_name==comp.names[i],]
>    data[[i]] =
cbind(temp,c(NA,diff(temp$price)/temp$price[1:(length(temp$price)-1)]))
> }
> sync = do.call(rbind,data)
> names(sync)[7] = "returns"
> detach(sync)
> attach(sync)
>
> #### Fill industry_code column
> industry_code=rep(NA,dim(sync)[1])
> for(i in 1:dim(sync)[1]){
>    if(nchar(as.character(company_code[i])) == 3){
>      industry_code[i] =
as.numeric(substr(as.character(company_code[i]),1,1))
>    } else {
>      industry_code[i] =
as.numeric(substr(as.character(company_code[i]),1,2))
>    }
>    print((i/dim(sync)[1])*100)
> }
> sync = cbind(sync,as.factor(industry_code))
> names(sync)[8] = "industry_code"
> detach(sync)
> attach(sync)
>
> #### Calculate market weighted returns and add to the dataset
> market_returns = rep(NA,dim(sync)[1])
> industry_returns = rep(NA,dim(sync)[1])
> for(i in 1:nlevels(date)){
>      data = sync[date==levels(date)[i],]
>      data$company_name = as.factor(as.character(data$company_name))
>      for(m in 1:nlevels(data$company_name)){
>        index1 = data$company_name == levels(data$company_name)[m]
>        index2 = date==levels(date)[i] &
company_name==levels(data$company_name)[m]
>        market_returns[index2] =
(sum(data$returns*(data$mv/sum(data$mv,na.rm=TRUE)),na.rm=TRUE) -
>         
(data$returns[index1]*(data$mv[index1]/sum(data$mv,na.rm=TRUE))))/(nlevels(data$company_name)-1)
## this what I need to change, instead of using the number of levels companies
in the dataset (nlevels(data$company_name) , I need to put the number of returns
values(data$returns) without NA (by the way this code is calculating returns at
the date level as you can see from above)
>      }
>    print(i/nlevels(date))
> }
>
> sync = cbind(sync,market_returns)
> names(sync)[9] = "market_returns"
> detach(sync)
> attach(sync)
>
> #### Calculate industry weighted returns and add to the dataset
> for(i in 1:nlevels(date)){
>      for(k in 1:nlevels(as.factor(as.character(industry_code)))){
>        data1 = sync[date==levels(date)[i] &
industry_code==levels(as.factor(as.character(industry_code)))[k],]
>        data1$company_name = as.factor(as.character(data1$company_name))
>        for(l in 1:nlevels(data1$company_name)){
>          index3 = data1$company_name == levels(data1$company_name)[l]
>          index4 = date==levels(date)[i] &
company_name==levels(data1$company_name)[l]
>          industry_returns[index4] =
(sum(data1$returns*(data1$mv/sum(data1$mv,na.rm=TRUE)),na.rm=TRUE) -
>           
(data1$returns[index3]*(data1$mv[index3]/sum(data1$mv,na.rm=TRUE))))/(nlevels(data1$company_name)-1)
## also here I need to change, instead of using the number of levels companies
in the dataset (nlevels(data1$company_name) , I need to put the number of
returns values(data1$returns) without NA (by the way this code is calculating
returns at the date level and industry level as you can see from above)
>
>        }
>      }
>    print(i/nlevels(date))
> }
>
> sync = cbind(sync,industry_returns)
> names(sync)[10] = "industry_returns"
> detach(sync)
> attach(sync)
>
> year = apply(as.matrix(sync$date),1,function(x)
as.factor(substr(as.character(x),7,10)))
> sync = cbind(sync,as.factor(year))
> names(sync)[11] = "year"
> sync = sync[sync$year!="1999",]
> sync$year = as.factor(as.character(sync$year))
> detach(sync)
> attach(sync)
>
> year = as.factor(as.character(year))
> industry_code = as.factor(as.character(industry_code))
> comp.per.ind = rep(NA, dim(sync)[1])
> for(i in 1:nlevels(year)){
>    for(j in 1:nlevels(industry_code)){
>      index = year==levels(year)[i] &
industry_code==levels(industry_code)[j]
>      data = sync[index,]
>      comp.per.ind[index] =
nlevels(as.factor(as.character(data$company_name)))
>    }
> }
>
> sync = cbind(sync,as.factor(comp.per.ind))
> names(sync)[12] = "comp.per.ind"
> detach(sync)
> attach(sync)
>
>
write.csv(sync,paste("Returns_data",id.country,".csv",sep=""))
>
>
>
>
> Thank you for your help
>
> Rami Alzebdieh
>
>
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

R help - Mar 2015 - R Help

[R] R Help

[R] R Help