The goal of this assignment is to explore the NOAA Storm Database explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.
The following questions are adressed:
- Across the United States, which types of events are most harmful with respect to population health?
- Across the United States, which types of events have the greatest economic consequences?
Variables are overwritten with newly calculated data of the same variable, to reduce memory footprint.
Load data
if (!file.exists("repdata-data-StormData.csv.bz2")) {
download.file(
"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"repdata-data-StormData.csv.bz2")
}
if (!exists("storm")) {
storm <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
storm <- storm %>% as.data.table()
}
Handle the undocumented exponent value of PROPDMGEXP and CROPDMGEXP according to this document. First all characters in PROPDMGEXP and CROPDMGEXP are converted to uppercase. This chunk is very slow due to the updating of all columns by reference.
cols <- c("PROPDMGEXP", "CROPDMGEXP")
storm[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
dmgExp <- c("\"\"" = 0, "-" = 0, "?" = 0,
"+" = 1,
"0" = 10, "1" = 10, "2" = 10, "3" = 10, "4" = 10,
"5" = 10, "6" = 10, "7" = 10, "8" = 10,
"H" = 100,
"K" = 1000,
"M" = 1000000,
"B" = 1000000000)
storm[, PROPDMGEXP := dmgExp[as.character(storm[,PROPDMGEXP])]]
storm[is.na(PROPDMGEXP), PROPDMGEXP := 0 ]
storm[, CROPDMGEXP := dmgExp[as.character(storm[,CROPDMGEXP])]]
storm[is.na(CROPDMGEXP), CROPDMGEXP := 0 ]
Summarize crop and property damages
storm$DAMAGES <- storm$CROPDMG * storm$CROPDMGEXP + storm$PROPDMG * storm$PROPDMGEXP
Subset the dataset to include only the needed data
storm <- storm[,c("EVTYPE", "FATALITIES", "INJURIES", "DAMAGES")]
Aggregate event names
storm$EVTYPE <- gsub("^HEAT$", "EXCESSIVE HEAT", storm$EVTYPE)
storm$EVTYPE <- gsub("^TSTM WIND$", "STRONG WIND", storm$EVTYPE)
storm$EVTYPE <- gsub("^THUNDERSTORM WIND$", "STRONG WIND", storm$EVTYPE)
Aggregate data on fatalities
fatalities <- aggregate(storm$FATALITIES, by=list(storm$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(fatalities) = c("EVTYPE", "FATALITIES")
fatalities <- fatalities[order(-fatalities$FATALITIES),]
fatalities$EVTYPE <- factor( fatalities$EVTYPE, levels=fatalities$EVTYPE, ordered=TRUE)
Aggregate data on injuries
injuries <- aggregate(storm$INJURIES, by=list(storm$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(injuries) = c("EVTYPE", "INJURIES")
injuries <- injuries[order(-injuries$INJURIES),]
injuries$EVTYPE <- factor(injuries$EVTYPE, levels=injuries$EVTYPE, ordered=TRUE)
Aggregate data on damages
damages <- aggregate(storm$DAMAGES, by=list(storm$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(damages) = c("EVTYPE", "DAMAGES")
damages <- damages[order(-damages$DAMAGES),]
damages$EVTYPE <- factor(damages$EVTYPE, levels=damages$EVTYPE, ordered=TRUE)
Plot of first 10 fatalities causes by event
ggplot(data=fatalities[1:10,], aes(x=EVTYPE, y=FATALITIES)) +
geom_bar(aes(fill=FATALITIES), stat="identity") +
xlab("Event Type") + ylab("Total Fatalities") + ggtitle("Fatalities by Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title = element_text(lineheight=.8, face="bold")) + guides(fill=FALSE)
Plot first 10 injury causes by event
ggplot(data=injuries[1:10,], aes(x=EVTYPE, y=INJURIES)) +
geom_bar(aes(fill=INJURIES), stat="identity") +
xlab("Event Type") + ylab("Total Injuries") + ggtitle("Injuries by Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title = element_text(lineheight=.8, face="bold")) + guides(fill=FALSE)
Plot the top causes of property damages
ggplot(data=damages[1:10,], aes(x=EVTYPE, y=DAMAGES)) +
geom_bar(aes(fill=DAMAGES), stat="identity") +
xlab("Event Type") + ylab("Total Damage in USD") +
ggtitle("Combined Property and Crop Damages by Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.title = element_text(lineheight=.8, face="bold")) + guides(fill=FALSE)