정보
-
업무명 : 네이버 네모네모로직 사이트 퍼즐 크롤링 및 해결 프로그램
-
작성자 : 박진만
-
작성일 : 2020-10-24
-
설 명 :
-
수정이력 :
내용
[요약]
-
크롤링 대상 사이트 : 네이버 네모네모 로직 사이트 : nemonemologic.com/
-
크롤링 내용 : URL 번호를 입력하는 경우 나오는 퍼즐 정보
-
사용 프로그램 : R
-
프로그램 정보
-
1. URL 접근 및 퍼즐 정보 GET
-
2. 행렬 및 퍼즐 내용 입력
-
3. 퍼즐 풀이 및 시각화
-
[프로그램 설명]
-
라이브러리 로드
library(rmarkdown)
library(dplyr)
library(dynlm)
library(repr)
library(gstat)
library(stringr)
library(xlsx)
library(XML)
library(httr)
library(curl)
library(gtools)
library(partitions)
library(stringr)
library(data.table)
library(plot.matrix)
-
서브함수 로드
pattern_check <- function(number = number, row = row) {
row <- row
lcha <- "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
number <- number
fill_num <- c()
cc <- 0
for (i in number) {
cc <- cc + 1
fill_num[cc] <- ""
for (j in 1:i) {
fill_num[cc] <- paste0(fill_num[cc],"1")
if(j == i) {
fill_num[cc] <- paste0(fill_num[cc],"0")
}
}
}
fill_num[cc] <- str_replace(fill_num[cc], pattern="0", replacement="")
row_df <- as.data.frame(t(fill_num),stringsAsFactors = F)
n <- sum(nchar(fill_num))
target_0 <- row - n
bang_count <- length(fill_num) + 1
if(number[1] == 0) {
df_L2 <- as.data.frame(matrix(nrow = 1, ncol = row, 0))
return(df_L2)
} else {
combin <- compositions(target_0,bang_count)
combin <- as.matrix(t(combin))
combin <- data.frame(combin)
}
# combin <- compositions(target_0,bang_count)
# combin <- as.matrix(t(combin))
# combin <- data.frame(combin)
## set col names ##
for (i in seq(1,length(fill_num)+1,1)) {
colnames(combin)[i] <- paste0("B",i)
combin[[paste0("B",i)]] <- str_sub(lcha,1,combin[[paste0("B",i)]])
#MAKE NA
combin[[paste0("B",i)]] <- ifelse(combin[[paste0("B",i)]] == "",NA,combin[[paste0("B",i)]])
if(i <= length(fill_num)) {
colnames(row_df)[i] <- paste0("R",i)
}
}
## set col names ##
for (i in seq(1,length(fill_num)+1,1)) {
if(i == 1){
df <- cbind(combin[paste0("B",i)],row_df[paste0("R",i)])
next
}
if(i <= length(fill_num)) {
df <- cbind(df,combin[paste0("B",i)],row_df[paste0("R",i)])
} else {
df <- cbind(df,combin[paste0("B",i)])
}
}
for (i in seq(1,length(number)+1,1)) {
df[[paste0("B",i)]] <- as.character(df[[paste0("B",i)]])
}
df_L1 <- df %>%
tidyr::unite("result", na.rm = T, sep="")
df_L2 <- as.data.frame(str_split(df_L1$result,pattern = "",simplify = TRUE),stringsAsFactors = F)
if(number[1] == 0) {
df_L2 <- as.data.frame(matrix(nrow = 1, ncol = row, 0))
}
return(df_L2)
}
combin_check <- function(df = df, row = row) {
row_percent <- c()
for (i in 1:row) {
calc <- sum(as.integer(df[[paste0("V",i)]]))/dim(df)[1]
row_percent <- append(row_percent,calc)
}
return(row_percent)
}
-
URL 접근 및 크롤링 정보 파싱
url <- paste0("http://nemonemologic.com/embed.php?quid=",13023)
thepage = readLines (url)
puzzle_info <- thepage[13]
puzzle_infoL1 <- str_extract_all(puzzle_info, pattern="\\[(.*?)\\]") #대괄호 안의 문자 추출
puzzle_infoL2 <- unlist(puzzle_infoL1)
length(puzzle_infoL2)
row_line <- data.frame()
col_line <- data.frame()
check <- FALSE
for (i in 1:length(puzzle_infoL2)) {
if(i >= 2 & check == FALSE) {check <- ifelse(str_sub(puzzle_infoL2[i],2,2) == "[",TRUE,FALSE)}
puzzle_infoL3 <- str_extract_all(puzzle_infoL2[i], pattern="[\\w]{1,2}")
puzzle_part <- as.data.frame(t(str_split(unlist(puzzle_infoL3),pattern = " ",simplify = TRUE)),stringsAsFactors = F)
if(is.na(as.numeric(puzzle_part[1]))){
next
}
if(check == FALSE) {
col_line <- dplyr::bind_rows(col_line,puzzle_part)
} else {
row_line <- dplyr::bind_rows(row_line,puzzle_part)
}
}
col_line[is.na(col_line)] = 0
row_line[is.na(row_line)] = 0
row_info <- dim(row_line)[1]
col_info <- dim(col_line)[1]
puzzle <- matrix(nrow = row_info, ncol = col_info,0)
-
퍼즐로부터 ROW / COL 별 경우의 수 확인
row_df <- data.frame()
for (i in 1:row_info) {
row_number <- as.numeric(row_line[i,])
if(row_number[1] == 0){
row_number <- 0
} else {
row_number <- row_number[row_number != 0]
}
row_df_part <- pattern_check(row = col_info, number = row_number)
row_df_part[["row_num"]] <- i
row_df <- rbind(row_df,row_df_part)
}
col_df <- data.frame()
for (i in 1:col_info) {
col_number <- as.numeric(col_line[i,])
if(col_number[1] == 0){
col_number <- 0
} else {
col_number <- col_number[col_number != 0]
}
col_df_part <- pattern_check(row = row_info,number = col_number)
col_df_part[["col_num"]] <- i
col_df <- rbind(col_df,col_df_part)
}
checking = F
im <- -1
check_col <- col_df
check_row <- row_df
check_puzzle <- puzzle
ci_check <- 0
cj_check <- 0
ss <- 0
stack <- 1
stack_save <- array(0, dim=c(100,dim(puzzle)[1],dim(puzzle)[2]))
ci_stack <- c()
cj_stack <- c()
percent_row <- matrix(nrow = row_info, ncol = col_info,0)
percent_col <- matrix(nrow = col_info, ncol = row_info,0)
total_percent <- matrix(nrow = col_info, ncol = row_info,0)
-
퍼즐 풀이 및 시각화
while(T) {
false_flag_ROW <- F
false_flag_COL <- F
im <- im + 1
b_row <- dim(row_df)[1]
b_col <- dim(col_df)[1]
################## elimination ##################
for (i in 1:row_info) {
for (j in 1:col_info) {
if(puzzle[i,j] == 1) {
### row ###
row_df_part <- row_df %>%
dplyr::filter(row_num == i)
row_df <- row_df %>%
dplyr::filter(row_num != i)
row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
row_df <- rbind(row_df,row_df_part)
### row ###
### col ###
col_df_part <- col_df %>%
dplyr::filter(col_num == j)
col_df <- col_df %>%
dplyr::filter(col_num != j)
col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "1",]
col_df <- rbind(col_df,col_df_part)
### col ###
} else if (puzzle[i,j] == -1) {
### row ###
row_df_part <- row_df %>%
dplyr::filter(row_num == i)
row_df <- row_df %>%
dplyr::filter(row_num != i)
row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "0",]
row_df <- rbind(row_df,row_df_part)
### row ###
### col ###
col_df_part <- col_df %>%
dplyr::filter(col_num == j)
col_df <- col_df %>%
dplyr::filter(col_num != j)
col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "0",]
col_df <- rbind(col_df,col_df_part)
### col ###
}
}
}
################## elimination ##################
a_row <- dim(row_df)[1]
a_col <- dim(col_df)[1]
while(T) {
false_flag_ROW <- F
false_flag_COL <- F
im <- im + 1
b_row <- dim(row_df)[1]
b_col <- dim(col_df)[1]
################## elimination ##################
for (i in 1:row_info) {
for (j in 1:col_info) {
if(puzzle[i,j] == 1) {
### row ###
row_df_part <- row_df %>%
dplyr::filter(row_num == i)
row_df <- row_df %>%
dplyr::filter(row_num != i)
row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
row_df <- rbind(row_df,row_df_part)
### row ###
### col ###
col_df_part <- col_df %>%
dplyr::filter(col_num == j)
col_df <- col_df %>%
dplyr::filter(col_num != j)
col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "1",]
col_df <- rbind(col_df,col_df_part)
### col ###
} else if (puzzle[i,j] == -1) {
### row ###
row_df_part <- row_df %>%
dplyr::filter(row_num == i)
row_df <- row_df %>%
dplyr::filter(row_num != i)
row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "0",]
row_df <- rbind(row_df,row_df_part)
### row ###
### col ###
col_df_part <- col_df %>%
dplyr::filter(col_num == j)
col_df <- col_df %>%
dplyr::filter(col_num != j)
col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "0",]
col_df <- rbind(col_df,col_df_part)
### col ###
}
}
}
################## elimination ##################
a_row <- dim(row_df)[1]
a_col <- dim(col_df)[1]
###################### row ###########################
for (i in 1:row_info) {
false_flag <- F
row_df_part <- row_df %>%
dplyr::filter(row_num == i)
## TEST ##
for (j in 1:row_info) {
row_df_part[row_df_part[[paste0("V",i)]] == puzzle[i,j],]
}
## TEST ##
## 모순 체크 ##
if(dim(row_df_part)[1] == 0) {
false_flag_ROW <- T
break
}
## 모순 체크 ##
percent <- combin_check(df = row_df_part,row = col_info)
percent_row[i,] <- percent
for (j in 1:length(percent)) {
if(percent[j] == 1){
puzzle[i,j] <- 1
#row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
} else if (percent[j] == 0) {
puzzle[i,j] <- -1
#print(puzzle[i,j])
} else {
if(puzzle[i,j] == -1 | puzzle[i,j] == 1) {
puzzle[i,j] <- puzzle[i,j]
} else {
puzzle[i,j] <- 0
}
}
}
}
###################### row ###########################
###################### col ###########################
for (i in 1:col_info) {
false_flag <- F
col_df_part <- col_df %>%
dplyr::filter(col_num == i)
## TEST ##
for (j in 1:row_info) {
col_df_part[col_df_part[[paste0("V",i)]] == puzzle[j,i],]
}
## 모순 체크 ##
if(dim(col_df_part)[1] == 0) {
false_flag_COL <- T
break
}
## 모순 체크 ##
percent <- combin_check(df = col_df_part,row = row_info)
percent_col[,i] <- percent
#print(percent)
for (j in 1:length(percent)) {
if(percent[j] == 1){
puzzle[j,i] <- 1
} else if (percent[j] == 0) {
puzzle[j,i] <- -1
} else {
if(puzzle[j,i] == -1 | puzzle[j,i] == 1) {
puzzle[j,i] <- puzzle[j,i]
} else {
puzzle[j,i] <- 0
}
}
}
#print(percent)
#print(max(percent))
}
row_dis <- b_row - a_row
col_dis <- b_col - a_col
# print(percent_row)
# print(percent_col)
if(a_row == row_info & a_col == col_info) {
stack <- 0
print(paste0("stack : ",stack))
break
} else if (row_dis == 0 & col_dis == 0 & im != 0 & false_flag_ROW == F & false_flag_COL == F) {
stack <- stack + 1
print(paste0("stack : ",stack))
if(stack == 2){
check_col <- col_df
check_row <- row_df
stack_save[stack,,] <- puzzle
} else {
stack_save[stack,,] <- puzzle
}
check_col_stack <- col_df
check_row_stack <- row_df
check_puzzle_stack <- puzzle
count <- 0
checking <- F
for (ci in 1:row_info) {
for (cj in 1:col_info) {
total_percent[ci,cj] <- max(percent_row[ci,cj],percent_col[ci,cj])
}
}
total_percent <- ifelse(total_percent == 1.0,-999,total_percent)
cicj <- which(total_percent == max(total_percent), arr.ind = TRUE)
cicj <- as.data.frame(cicj)
ci <- cicj$row[1]
cj <- cicj$col[1]
print(paste0("fix",ci," ",cj))
print(paste0("percent : ",total_percent[ci,cj]))
check_puzzle_stack[ci,cj] = 1
ci_check <- ci
cj_check <- cj
col_df <- check_col_stack
row_df <- check_row_stack
puzzle <- check_puzzle_stack
puzzle[ci_check,cj_check] <- 1
checking <- F
ci_stack[stack] <- ci_check
cj_stack[stack] <- cj_check
}
if (false_flag_ROW == T | false_flag_COL == T) {
stack <- stack - 1
print("logical error exist! go back checkpoint!!")
print(paste0("stack : ",stack))
col_df <- check_col
row_df <- check_row
puzzle <- stack_save[stack,,]
#puzzle[ci_check,cj_check] <- -1
checking <- F
for (k in seq(stack+1,1,-1)) {
if(k == stack+1) {puzzle[ci_stack[k],cj_stack[k]] <- -1}
else {puzzle[ci_stack[k],cj_stack[k]] <- 1}
}
stack_save[stack,,] <- puzzle
}
################## elimination 2 ##################
###################### col ###########################
for (i in 1:col_info) {
false_flag <- F
col_df_part <- col_df %>%
dplyr::filter(col_num == i)
## TEST ##
for (j in 1:row_info) {
col_df_part[col_df_part[[paste0("V",i)]] == puzzle[j,i],]
}
## 모순 체크 ##
if(dim(col_df_part)[1] == 0) {
false_flag_COL <- T
break
}
## 모순 체크 ##
percent <- combin_check(df = col_df_part,row = row_info)
percent_col[,i] <- percent
#print(percent)
for (j in 1:length(percent)) {
if(percent[j] == 1){
puzzle[j,i] <- 1
} else if (percent[j] == 0) {
puzzle[j,i] <- -1
} else {
if(puzzle[j,i] == -1 | puzzle[j,i] == 1) {
puzzle[j,i] <- puzzle[j,i]
} else {
puzzle[j,i] <- 0
}
}
}
#print(percent)
#print(max(percent))
}
row_dis <- b_row - a_row
col_dis <- b_col - a_col
# print(percent_row)
# print(percent_col)
if(a_row == row_info & a_col == col_info) {
stack <- 0
print(paste0("stack : ",stack))
break
} else if (row_dis == 0 & col_dis == 0 & im != 0 & false_flag_ROW == F & false_flag_COL == F) {
stack <- stack + 1
print(paste0("stack : ",stack))
if(stack == 2){
check_col <- col_df
check_row <- row_df
stack_save[stack,,] <- puzzle
} else {
stack_save[stack,,] <- puzzle
}
check_col_stack <- col_df
check_row_stack <- row_df
check_puzzle_stack <- puzzle
count <- 0
checking <- F
for (ci in 1:row_info) {
for (cj in 1:col_info) {
total_percent[ci,cj] <- max(percent_row[ci,cj],percent_col[ci,cj])
}
}
total_percent <- ifelse(total_percent == 1.0,-999,total_percent)
cicj <- which(total_percent == max(total_percent), arr.ind = TRUE)
cicj <- as.data.frame(cicj)
ci <- cicj$row[1]
cj <- cicj$col[1]
print(paste0("fix",ci," ",cj))
print(paste0("percent : ",total_percent[ci,cj]))
check_puzzle_stack[ci,cj] = 1
ci_check <- ci
cj_check <- cj
col_df <- check_col_stack
row_df <- check_row_stack
puzzle <- check_puzzle_stack
puzzle[ci_check,cj_check] <- 1
checking <- F
ci_stack[stack] <- ci_check
cj_stack[stack] <- cj_check
}
if (false_flag_ROW == T | false_flag_COL == T) {
stack <- stack - 1
print("logical error exist! go back checkpoint!!")
print(paste0("stack : ",stack))
col_df <- check_col
row_df <- check_row
puzzle <- stack_save[stack,,]
#puzzle[ci_check,cj_check] <- -1
checking <- F
for (k in seq(stack+1,1,-1)) {
if(k == stack+1) {puzzle[ci_stack[k],cj_stack[k]] <- -1}
else {puzzle[ci_stack[k],cj_stack[k]] <- 1}
}
stack_save[stack,,] <- puzzle
}
################## elimination 2 ##################
ss <- ss + 1
# jpeg(paste0('result_',ss,'.jpg'),width = 800,height = 800)
plot(puzzle,col=c('gray', 'white','black'), key=NULL, axis.col=NULL, axis.row=NULL, xlab='', ylab='')
# dev.off()
}
-
풀이 결과
참고 문헌
[논문]
- 없음
[보고서]
- 없음
[URL]
- 없음
문의사항
[기상학/프로그래밍 언어]
- sangho.lee.1990@gmail.com
[해양학/천문학/빅데이터]
- saimang0804@gmail.com
ss <- ss + 1
# jpeg(paste0('result_',ss,'.jpg'),width = 800,height = 800)
plot(puzzle,col=c('gray', 'white','black'), key=NULL, axis.col=NULL, axis.row=NULL, xlab='', ylab='')
# dev.off()
}
'프로그래밍 언어 > R' 카테고리의 다른 글
[R] 특정 날짜의 네이버 뉴스 제목 크롤링 (0) | 2020.11.26 |
---|---|
[R] 한국 주요 좌표계에 대한 proj4 인자 및 지도 예시 (0) | 2020.10.25 |
[R] R 및 Python을 이용한 '디시인사이드' MBTI 갤러리 웹 크롤링 및 키워드 분석을 통한 워드 클라우드 생성 (1) | 2020.05.23 |
[R] R을 이용한 수치해석 : 2020년 대학수학능력시험 (수능) 가형 기출문제 (0) | 2020.05.18 |
[R] 서울대 통계 연구소 R을 이용한 빅데이터 분석 교육 연수 (0) | 2020.05.12 |
최근댓글