2021. 04. 01

PMJ
1 min readApr 1, 2021

##DATA

setwd('D:/work')
library(data.table)
#install.packages("bit64")
RUSH_FULL_sub <-
RUSH_FULL %>%
group_by(DATA_TYPE) %>%
summaries(
MIN_INITIAL = min(INITIAL_DATE, na.rm = T)
)

RUSH_FULL_sub2 <-
RUSH_FULL %>%
filter(!(((GRADE == "")|(is.na(GRADE)))) %>%
group_by(GRADE) %>%
summarise(
MEDIAN_INITIAL = median(as.Date(INITIAL_DATE), na.rm = T)
)

RUSH_FULL$INITIAL_DATE %>% table()

#””, NA → 몇개씩 있는지

#1
left_join(RUSH_FULL, RUSH_FULL_sub2) %>%
filter(is.na(MEDIAN_INITIAL)) %>%
select(GRADE) %>%
mutate(
RESULT = ifelse(is.na(GRADE), "A","B")
) %>%
group_by(
RESULT
) %>%
summarise(
N = n()
)
#2 - pipe 없이 변환
RUSH_FULL_NEWCOL <- mutate(
subset(
left_join(RUSH_FULL, RUSH_FULL_sub2),
subset = (is.na(MEDIAN_INITIAL))),
select = c(GRADE), RESULT = ifelse(is.na(GRADE), "A", "B"))
summaries(N = n(), n_distinct(RUSH_FULL_NEWCOL$RESULT)table(RUSH_FULL_NEWCOL$RESULT)
##NA 개수
sum(is.na(left_join(RUSH_FULL, RUSH_FULL_sub2)$GRADE))

#group_by, summaries 이용해서 RESULT_BIDING에 따른 BIDING_PRICE 평균값

POP_PRICE$BIDING_PRICE = as.double(POP_PRICE$BIDING_PRICE)POP_PRICE %>%
group_by(BIDING_PRICE) %>%
summarise(RESULT_BIDING = mean(BIDING_PRICE, na.rm = T))

#group_by, summarise 이용해서 RESLUT_BIDING, ITEM_CATEGORY 에 따른 BIDING_PRICE 평균값

POP_PRICE %>%
group_by(RESULT_BIDING, ITEM_CATEGORY) %>%
summarise(ACG_BIDING_PRICE = mean.(BIDING_PRICE, na.rm = T))
POP_PRICE =
POP_PRICE %>%
filter(NUM %in% RUSH_FULL$NUM) %>%
select(DATA_DATE = BIDING_MONTH, NUM, AVM_GENERAL = PRICE_GEN)
RUSH_FULL =
RUSH_FULL %>%
mutate(
INITIAL_MONTH = INITIAL_DATE %>%
str_remove_all("-") %>%
str_replace("[0-9]{2}$", "01")
)
#숫자 두글자로 끝나는경우.
#”A23", “234”, “BC1”
#[0–9]{2} : 숫자 두 글자로 끝나는 경우
#[0–9]{1,2} : 숫자 1회 이상 2회 이하로 나오는 경우
#[0–9]{2,} : 숫자 두 글자 이상으로 끝나는 경우

#??

RUSH_FULL_temp <- RUSH_FULL %>%
filter(!((GRADE == "")|(is.na(GRADE)))) %>%
group_by(GRADE) %>%
summarise(MIN_INIT = min(INITIAL_DATE, na.rm = T)
)

RUSH_FULL_temp2 <- RUSH_FULL %>%
filter(!((GRADE == "") | is.na(GRADE)))) %>%
summarise(GRADE, MIN_INIT = min(INITIAL_DATE, na.rm = T)
)

--

--