### Combine all data together ### 07.06.19: Removes noisy tickers if you want it to if(!is.null(dev.list())) dev.off() cat("\014") rm(list=ls()) setwd("C:/Users/A1310360/Dropbox/Dokumenter/Skole/BI/Master Thesis/Excel") #setwd("/Users/emilbreivikasskei/Dropbox/Dokumenter/Skole/BI/Master\ Thesis/Excel/") library(dplyr) library(readxl) # Import all the data abnormal_returns <- read.csv("Abnormal_returns_test.csv") abnormal_turnover <- read.csv("Abnormal_turnover.csv") ASVI <- read.csv("ASVI.csv") log_market_cap <- read.csv("Log_market_cap.csv") market_cap_ASVI <- read.csv("Log_market_cap_ASVI.csv") advsales <- read.csv("Advsales.csv") log_num_analysts <- read.csv("Log_Num_Analysts.csv") market_cap <- read.csv("Market_cap.csv") num_analysts <- read.csv("Num_Analysts_desc.csv") # Save date and year column dates <- as.Date(abnormal_returns$X.1) years <- as.numeric(advsales$Year) years <- years[-1] # Delete the first column for each data frame abnormal_returns <- abnormal_returns[,-1] abnormal_turnover <- abnormal_turnover[,-1] ASVI <- ASVI[,-1] log_market_cap <- log_market_cap[,-1] market_cap_ASVI <- market_cap_ASVI[,-1] advsales <- advsales[,-1] # index advsales <- advsales[,-1] # years log_num_analysts <- log_num_analysts[,-1] # index log_num_analysts <- log_num_analysts[,-1] # years market_cap <- market_cap[,-1] num_analysts <- num_analysts[,-1] # index num_analysts <- num_analysts[,-1] # years # Delete the 2018 data for advsales advsales <- advsales[-6,] ### Order data alphabetically abnormal_returns <- abnormal_returns[, order(colnames(abnormal_returns))] abnormal_turnover <- abnormal_turnover[, order(colnames(abnormal_turnover))] ASVI <- ASVI[, order(colnames(ASVI))] log_market_cap <- log_market_cap[, order(colnames(log_market_cap))] market_cap_ASVI <- market_cap_ASVI[, order(colnames(market_cap_ASVI))] advsales <- advsales[, order(colnames(advsales))] log_num_analysts <- log_num_analysts[, order(colnames(log_num_analysts))] market_cap <- market_cap[, order(colnames(market_cap))] num_analysts <- num_analysts[, order(colnames(num_analysts))] # Remove noisy tickers #noisy <- read_excel("NoisyTickers.xlsx", sheet = 1, col_names = TRUE) #noisy <- noisy[,-(2:3)] #noisy <- noisy[order(noisy$Tickers),] #noisy_tickers <- subset(noisy, noisy$Remove == 1) #noisy_tickers <- as.character(noisy_tickers$Tickers) # Remove companies with only NA data rem_tick <- c("BXLT", "CPGX", "JEF", "EVRG", "BKNG", "CBRE") #rem_tick <- c(rem_tick, noisy_tickers) tickers <- as.character(unique(colnames(abnormal_returns))) index <- as.numeric(rep(0, length(rem_tick))) for (i in 1:length(rem_tick)) { index[i] <- as.numeric(which(tickers == rem_tick[i])) } abnormal_returns <- abnormal_returns[,-index] abnormal_turnover <- abnormal_turnover[,-index] ASVI <- ASVI[,-index] log_market_cap <- log_market_cap[,-index] market_cap_ASVI <- market_cap_ASVI[,-index] advsales <- advsales[,-index] log_num_analysts <- log_num_analysts[,-index] market_cap <- market_cap[,-index] num_analysts <- num_analysts[,-index] # Update tickers vector tickers <- as.character(colnames(abnormal_returns)) # Need to add the date vector to each data frame (and year vector to advsales) abnormal_returns <- data.frame(dates, abnormal_returns) advsales <- data.frame(years, advsales) # Create Year-column for one dataset, in order to match compustat data with the rest: Year <- as.numeric(format(date,'%Y')) abnormal_returns$years <- as.numeric(format(abnormal_returns$dates, "%Y")) # Combine abnormal_returns with advsales using the years column advsales_new <- data.frame(rep(0, 261)) years_long <- data.frame(abnormal_returns$years) colnames(years_long) <- "years" # with noisy tickers: 1:613 # without: 1:516 for (i in 1:ncol(ASVI)) { temp <- data.frame(advsales[,1], advsales[,i+1]) colnames(temp) <- c("years", "data") temp2 <- left_join(years_long, temp, by = "years") advsales_new[,i] <- temp2$data } colnames(advsales_new) <- tickers # Delete date and year column abnormal_returns <- abnormal_returns[,-1] abnormal_returns <- abnormal_returns[,-614] #with noisy tickers: 614. Without: 517 # Create absolute abnormal returns abs_abnormal_returns <- abs(abnormal_returns) ### Create lag_ASVI lag_ASVI <- data.frame(rep(0,261)) for (i in 1:length(ASVI)) {lag_ASVI[,i] <- lag(ASVI[,i])} ### Create lag_log_market_cap_ASVI lag_log_market_cap_ASVI <- data.frame(rep(0,261)) for (i in 1:length(market_cap_ASVI)) {lag_log_market_cap_ASVI[,i] <- lag(market_cap_ASVI[,i])} ### Create lag_log_market_cap lag_log_market_cap <- data.frame(rep(0,261)) for (i in 1:length(log_market_cap)) {lag_log_market_cap[,i] <- lag(log_market_cap[,i])} ### Create lag_absolute abnormal return lag_abs_abnormal_returns <- data.frame(rep(0,261)) for (i in 1:length(abs_abnormal_returns)) {lag_abs_abnormal_returns[,i] <- lag(abs_abnormal_returns[,i])} ### Create lag_advsales lag_advsales <- data.frame(rep(0,261)) for (i in 1:length(advsales_new)) {lag_advsales[,i] <- lag(advsales_new[,i])} ### Create lag_log_num_analysts lag_log_num_analysts <- data.frame(rep(0,261)) for (i in 1:length(log_num_analysts)) {lag_log_num_analysts[,i] <- lag(log_num_analysts[,i])} ### Create lag_abnormal_turnover lag_abnormal_turnover <- data.frame(rep(0,261)) for (i in 1:length(abnormal_turnover)) {lag_abnormal_turnover[,i] <- lag(abnormal_turnover[,i])} ### # create one data frame before the for-loop, then rbind() the frames for each for-loop col_names <- c("dates", "ticker", "abnormal_returns", "abs_abnormal_returns", "abnormal_turnover", "ASVI", "log_market_cap", "log_market_cap_ASVI", "advsales", "log_num_analysts", "market_cap", "num_analysts", "lag_ASVI", "lag_log_market_cap_ASVI", "lag_log_market_cap", "lag_abs_abnormal_returns", "lag_advsales", "lag_log_num_analysts", "lag_abnormal_turnover") master_data <- data.frame( dates, tickers[1], abnormal_returns[,1], abs_abnormal_returns[,1], abnormal_turnover[,1], ASVI[,1], log_market_cap[,1], market_cap_ASVI[,1], advsales_new[,1], log_num_analysts[,1], market_cap[,1], num_analysts[,1], lag_ASVI[,1], lag_log_market_cap_ASVI[,1], lag_log_market_cap[,1], lag_abs_abnormal_returns[,1], lag_advsales[,1], lag_log_num_analysts[,1], lag_abnormal_turnover[,1]) colnames(master_data) <- col_names # with noisy tickers: 612 # without: 515 for (i in 1:612) { temp3 <- data.frame( dates, tickers[i+1], abnormal_returns[,i+1], abs_abnormal_returns[,i+1], abnormal_turnover[,i+1], ASVI[,i+1], log_market_cap[,i+1], market_cap_ASVI[,i+1], advsales_new[,i+1], log_num_analysts[,i+1], market_cap[,i+1], num_analysts[,i+1], lag_ASVI[,i+1], lag_log_market_cap_ASVI[,i+1], lag_log_market_cap[,i+1], lag_abs_abnormal_returns[,i+1], lag_advsales[,i+1], lag_log_num_analysts[,i+1], lag_abnormal_turnover[,i+1]) colnames(temp3) <- col_names master_data <- rbind(master_data, temp3) } #write.csv(master_data, file = "Master_data_test.csv")