library(dplyr)
library(stringr)
library(tidyr)
input <- readLines("input4.txt") %>% paste(collapse = "\n") %>% str_split("\n\n") %>% unlist() %>% str_replace_all("\n", " ")
flds <- str_match(input, "([a-z]+):")[,2] %>% unique()
df <- data.frame(Orig = input)
for (f in flds) {
    df[[f]] <- str_match(input, paste0(f, ":([a-zA-Z0-9#]+)"))[,2]
}
df.valid <- df %>% drop_na(-cid)
str(df)
str(df.valid)
nrow(df.valid)

ecs <- c("amb", "blu", "brn", "gry", "grn", "hzl", "oth")
df.valid %>% mutate(
        byr = ifelse(between(as.numeric(str_match(byr, "^([0-9]+)$")[,2]), 1920, 2002), byr, NA),
        iyr = ifelse(between(as.numeric(str_match(iyr, "^([0-9]+)$")[,2]), 2010, 2020), iyr, NA),
        eyr = ifelse(between(as.numeric(str_match(eyr, "^([0-9]+)$")[,2]), 2020, 2030), eyr, NA),
        hgt = ifelse(between(as.numeric(str_match(hgt, "^([0-9]+)cm$")[,2]), 150, 193) | between(as.numeric(str_match(hgt, "^([0-9]+)in$")[,2]), 59, 76), hgt, NA),
        hcl = str_extract(hcl, "^[#]{1}[0-9a-f]{6}$"),
        ecl = ifelse(ecl %in% ecs, ecl, NA),
        pid = str_extract(pid, "^[0-9]{9}$")
    ) %>% drop_na(-cid) -> dv2
nrow(dv2)