반응형

     정보

    • 업무명     :  네이버 네모네모로직 사이트 퍼즐 크롤링 및 해결 프로그램

    • 작성자     : 박진만

    • 작성일     : 2020-10-24

    • 설   명      :

    • 수정이력 :

     

     내용

    [요약]

    • 크롤링 대상 사이트 : 네이버 네모네모 로직 사이트 :  nemonemologic.com/

    • 크롤링 내용 : URL 번호를 입력하는 경우 나오는 퍼즐 정보 

    • 사용 프로그램 : R

    • 프로그램 정보 

      • 1. URL 접근 및 퍼즐 정보 GET

      • 2. 행렬 및 퍼즐 내용 입력

      • 3. 퍼즐 풀이 및 시각화

     

    네모네모로직

    네모네모로직 게임 사이트

    nemonemologic.com

     

    [프로그램 설명]

    • 라이브러리 로드

    library(rmarkdown)
    library(dplyr)
    library(dynlm) 
    library(repr) 
    library(gstat) 
    library(stringr) 
    library(xlsx)
    library(XML) 
    library(httr) 
    library(curl)
    library(gtools)
    library(partitions) 
    library(stringr)
    library(data.table)
    library(plot.matrix)

     

    • 서브함수 로드

    pattern_check <- function(number = number, row = row) {
      
      row <- row
      
      lcha <- "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
      
      number <- number
      
      fill_num <- c()
      cc <- 0
      
      for (i in number) {
        
        cc <- cc + 1
        fill_num[cc] <- ""
        
        for (j in 1:i) {
          
          fill_num[cc] <- paste0(fill_num[cc],"1")
          
          if(j == i) {
            fill_num[cc] <- paste0(fill_num[cc],"0")
          }
          
        }
        
        
      }
      
      fill_num[cc] <- str_replace(fill_num[cc], pattern="0", replacement="")
      
      row_df <- as.data.frame(t(fill_num),stringsAsFactors = F)
      
      
      n <- sum(nchar(fill_num))
      target_0 <- row - n
      bang_count <- length(fill_num) + 1
      
      if(number[1] == 0) {
        df_L2 <- as.data.frame(matrix(nrow = 1, ncol = row, 0))
        return(df_L2)
      } else {
        combin <- compositions(target_0,bang_count)
        combin <- as.matrix(t(combin))
        combin <- data.frame(combin)
      }
      # combin <- compositions(target_0,bang_count)
      # combin <- as.matrix(t(combin))
      # combin <- data.frame(combin)
      
      
      ## set col names ##
      for (i in seq(1,length(fill_num)+1,1)) {
        
        colnames(combin)[i] <- paste0("B",i)
        combin[[paste0("B",i)]] <- str_sub(lcha,1,combin[[paste0("B",i)]])
        
        #MAKE NA
        combin[[paste0("B",i)]] <- ifelse(combin[[paste0("B",i)]] == "",NA,combin[[paste0("B",i)]])
        
        if(i <=  length(fill_num)) {
          colnames(row_df)[i] <- paste0("R",i)
        }
        
      }
      ## set col names ##
      
      
      for (i in seq(1,length(fill_num)+1,1)) {
        
        if(i == 1){
          df <- cbind(combin[paste0("B",i)],row_df[paste0("R",i)])
          next
        }
        
        if(i <= length(fill_num)) {
          df <- cbind(df,combin[paste0("B",i)],row_df[paste0("R",i)])
        } else {
          df <- cbind(df,combin[paste0("B",i)])
        }
        
      }
      
      
      for (i in seq(1,length(number)+1,1)) {
        df[[paste0("B",i)]] <- as.character(df[[paste0("B",i)]])
      }
      
      
      df_L1 <- df %>%
        tidyr::unite("result", na.rm = T, sep="")
      
      
      df_L2 <- as.data.frame(str_split(df_L1$result,pattern = "",simplify = TRUE),stringsAsFactors = F)
      
      
      if(number[1] == 0) {
        df_L2 <- as.data.frame(matrix(nrow = 1, ncol = row, 0))
      }
      
    
      return(df_L2)
      
    }
    
    
    combin_check <- function(df = df, row = row) {
      
      
      row_percent <- c()
      
      for (i in 1:row) {
        
        calc <- sum(as.integer(df[[paste0("V",i)]]))/dim(df)[1]
        
        row_percent <- append(row_percent,calc)
        
      }
      
      return(row_percent)
      
    }

     

    • URL 접근 및 크롤링 정보 파싱

    url <- paste0("http://nemonemologic.com/embed.php?quid=",13023)
    
    thepage = readLines (url)
    puzzle_info <- thepage[13]
    puzzle_infoL1 <- str_extract_all(puzzle_info, pattern="\\[(.*?)\\]") #대괄호 안의 문자 추출
    puzzle_infoL2 <- unlist(puzzle_infoL1)
    
    length(puzzle_infoL2)
    
    row_line <- data.frame()
    col_line <- data.frame()
    
    check <- FALSE
    
    for (i in 1:length(puzzle_infoL2)) {
        
        if(i >= 2 & check == FALSE) {check <- ifelse(str_sub(puzzle_infoL2[i],2,2) == "[",TRUE,FALSE)}
        
        puzzle_infoL3 <- str_extract_all(puzzle_infoL2[i], pattern="[\\w]{1,2}")
        
        puzzle_part <- as.data.frame(t(str_split(unlist(puzzle_infoL3),pattern = " ",simplify = TRUE)),stringsAsFactors = F)
        
        if(is.na(as.numeric(puzzle_part[1]))){
            next
        }
        
        if(check == FALSE) {
            col_line <- dplyr::bind_rows(col_line,puzzle_part)
        } else {
            row_line <- dplyr::bind_rows(row_line,puzzle_part)
        }
        
    }
    
    col_line[is.na(col_line)] = 0
    row_line[is.na(row_line)] = 0
    
    row_info <- dim(row_line)[1]
    col_info <- dim(col_line)[1]
    
    puzzle <- matrix(nrow = row_info, ncol = col_info,0)

    목표 사이트의 퍼즐 화면 예시

     

    퍼즐 INFOMATION

     

    파싱 결과

     

    • 퍼즐로부터 ROW / COL 별 경우의 수 확인

    row_df <- data.frame()
    
    for (i in 1:row_info) {
        
        row_number <- as.numeric(row_line[i,])
        
        if(row_number[1] == 0){
            row_number <- 0
        } else {
            row_number <- row_number[row_number != 0]
        }
        
        
        
        row_df_part <- pattern_check(row = col_info, number = row_number)
        row_df_part[["row_num"]] <- i
        
        row_df <- rbind(row_df,row_df_part)
        
    }
    
    
    col_df <- data.frame()
    
    for (i in 1:col_info) {
        
        col_number <- as.numeric(col_line[i,])
        
        if(col_number[1] == 0){
            col_number <- 0
        } else {
            col_number <- col_number[col_number != 0]
        }
        
        col_df_part <- pattern_check(row = row_info,number = col_number)
        col_df_part[["col_num"]] <- i
        
        col_df <- rbind(col_df,col_df_part)
        
    }
    
    
    checking = F
    
    im <- -1
    
    check_col <- col_df
    check_row <- row_df
    check_puzzle <- puzzle
    ci_check <- 0
    cj_check <- 0
    
    ss <- 0
    
    stack <- 1 
    stack_save <- array(0, dim=c(100,dim(puzzle)[1],dim(puzzle)[2]))
    ci_stack <- c()
    cj_stack <- c()
    
    percent_row <- matrix(nrow = row_info, ncol = col_info,0)
    percent_col <- matrix(nrow = col_info, ncol = row_info,0)
    total_percent <- matrix(nrow = col_info, ncol = row_info,0)

    행별 경우의 수 추출 예시

     

    • 퍼즐 풀이 및 시각화

    while(T) {
        
        false_flag_ROW <- F
        false_flag_COL <- F
        
        im <- im + 1
        b_row <- dim(row_df)[1]
        b_col <- dim(col_df)[1]
        
        ################## elimination ##################
        for (i in 1:row_info) {
            for (j in 1:col_info) {
                
                
                if(puzzle[i,j] == 1) {
                    
                    ### row ###
                    row_df_part <- row_df %>%
                        dplyr::filter(row_num == i)
                    
                    row_df <- row_df %>%
                        dplyr::filter(row_num != i)
                    
                    row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
                    
                    row_df <- rbind(row_df,row_df_part)
                    ### row ###
                    
                    ### col ###
                    col_df_part <- col_df %>%
                        dplyr::filter(col_num == j)
                    
                    col_df <- col_df %>%
                        dplyr::filter(col_num != j)
                    
                    col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "1",]
                    
                    col_df <- rbind(col_df,col_df_part)
                    ### col ###
                    
                    
                } else if (puzzle[i,j] == -1) {
                    
                    ### row ###
                    row_df_part <- row_df %>%
                        dplyr::filter(row_num == i)
                    
                    row_df <- row_df %>%
                        dplyr::filter(row_num != i)
                    
                    row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "0",]
                    
                    row_df <- rbind(row_df,row_df_part)
                    ### row ###
                    
                    ### col ###
                    col_df_part <- col_df %>%
                        dplyr::filter(col_num == j)
                    
                    col_df <- col_df %>%
                        dplyr::filter(col_num != j)
                    
                    col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "0",]
                    
                    col_df <- rbind(col_df,col_df_part)
                    ### col ###
                    
                }
                
            }
        }
        ################## elimination ##################
        
        a_row <- dim(row_df)[1]
        a_col <- dim(col_df)[1]
        
        
        while(T) {
            
            false_flag_ROW <- F
            false_flag_COL <- F
            
            im <- im + 1
            b_row <- dim(row_df)[1]
            b_col <- dim(col_df)[1]
            
            ################## elimination ##################
            for (i in 1:row_info) {
                for (j in 1:col_info) {
                    
                    
                    if(puzzle[i,j] == 1) {
                        
                        ### row ###
                        row_df_part <- row_df %>%
                            dplyr::filter(row_num == i)
                        
                        row_df <- row_df %>%
                            dplyr::filter(row_num != i)
                        
                        row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
                        
                        row_df <- rbind(row_df,row_df_part)
                        ### row ###
                        
                        ### col ###
                        col_df_part <- col_df %>%
                            dplyr::filter(col_num == j)
                        
                        col_df <- col_df %>%
                            dplyr::filter(col_num != j)
                        
                        col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "1",]
                        
                        col_df <- rbind(col_df,col_df_part)
                        ### col ###
                        
                        
                    } else if (puzzle[i,j] == -1) {
                        
                        ### row ###
                        row_df_part <- row_df %>%
                            dplyr::filter(row_num == i)
                        
                        row_df <- row_df %>%
                            dplyr::filter(row_num != i)
                        
                        row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "0",]
                        
                        row_df <- rbind(row_df,row_df_part)
                        ### row ###
                        
                        ### col ###
                        col_df_part <- col_df %>%
                            dplyr::filter(col_num == j)
                        
                        col_df <- col_df %>%
                            dplyr::filter(col_num != j)
                        
                        col_df_part <- col_df_part[col_df_part[[paste0("V",i)]] == "0",]
                        
                        col_df <- rbind(col_df,col_df_part)
                        ### col ###
                        
                    }
                    
                }
            }
            ################## elimination ##################
            
            a_row <- dim(row_df)[1]
            a_col <- dim(col_df)[1]
            
            ###################### row ###########################
            for (i in 1:row_info) {
                
                false_flag <- F
                
                row_df_part <- row_df %>%
                    dplyr::filter(row_num == i)
                
                ## TEST ##
                for (j in 1:row_info) {
                    row_df_part[row_df_part[[paste0("V",i)]] == puzzle[i,j],]
                }
                ## TEST ##
                
                ## 모순 체크 ##
                if(dim(row_df_part)[1] == 0) {
                    false_flag_ROW <- T
                    break
                }
                ## 모순 체크 ##
                
                percent <- combin_check(df = row_df_part,row = col_info)
                percent_row[i,] <- percent
                
                
                for (j in 1:length(percent)) {
                    
                    if(percent[j] == 1){
                        puzzle[i,j] <- 1
                        
                        #row_df_part <- row_df_part[row_df_part[[paste0("V",j)]] == "1",]
                        
                    } else if (percent[j] == 0) {
                        puzzle[i,j] <- -1
                        #print(puzzle[i,j])
                        
                    } else {
                        
                        if(puzzle[i,j] == -1 | puzzle[i,j] == 1) {
                            puzzle[i,j] <- puzzle[i,j]
                        } else {
                            puzzle[i,j] <- 0
                        }
                        
                    }
                    
                    
                }
                
                
            }
            ###################### row ###########################
            
            ###################### col ###########################
            for (i in 1:col_info) {
                
                false_flag <- F
                
                col_df_part <- col_df %>%
                    dplyr::filter(col_num == i)
                
                
                ## TEST ##
                for (j in 1:row_info) {
                    col_df_part[col_df_part[[paste0("V",i)]] == puzzle[j,i],]
                }
                
                ## 모순 체크 ##
                if(dim(col_df_part)[1] == 0) {
                    false_flag_COL <- T
                    break
                }
                ## 모순 체크 ##
                
                percent <- combin_check(df = col_df_part,row = row_info)
                percent_col[,i] <- percent
                
                #print(percent)
                for (j in 1:length(percent)) {
                    
                    if(percent[j] == 1){
                        
                        puzzle[j,i] <- 1
                        
                    } else if (percent[j] == 0) {
                        
                        puzzle[j,i] <- -1
                        
                    } else {
                        
                        if(puzzle[j,i] == -1 | puzzle[j,i] == 1) {
                            puzzle[j,i] <- puzzle[j,i]
                        } else {
                            puzzle[j,i] <- 0
                        }
                        
                    }
                    
                }
                
                #print(percent)
                #print(max(percent))
            }
            
            
            row_dis <- b_row - a_row
            col_dis <- b_col - a_col
            
            # print(percent_row)
            # print(percent_col)
            if(a_row == row_info & a_col == col_info) {
                stack <- 0
                print(paste0("stack : ",stack))
                break
            } else if (row_dis == 0 & col_dis == 0 & im != 0 & false_flag_ROW == F & false_flag_COL == F) {
                
                stack <- stack + 1
                print(paste0("stack : ",stack))
                
                
                if(stack == 2){
                    
                    check_col <- col_df
                    check_row <- row_df
                    stack_save[stack,,] <- puzzle
                    
                } else {
                    stack_save[stack,,] <- puzzle
                }
                
                check_col_stack <- col_df
                check_row_stack <- row_df
                check_puzzle_stack <- puzzle
                
                count <- 0 
                checking <- F
                
                
                
                for (ci in 1:row_info) {
                    for (cj in 1:col_info) {
                        
                        
                        total_percent[ci,cj] <- max(percent_row[ci,cj],percent_col[ci,cj])
                        
                    }
                }
                
                
                total_percent <- ifelse(total_percent == 1.0,-999,total_percent)
                
                cicj <- which(total_percent == max(total_percent), arr.ind = TRUE)
                cicj <- as.data.frame(cicj)
                
                ci <- cicj$row[1]
                cj <- cicj$col[1]
                print(paste0("fix",ci," ",cj))
                print(paste0("percent : ",total_percent[ci,cj]))
                check_puzzle_stack[ci,cj] = 1
                ci_check <- ci
                cj_check <- cj
                
                col_df <- check_col_stack
                row_df <- check_row_stack
                puzzle <- check_puzzle_stack
                puzzle[ci_check,cj_check] <- 1
                checking <- F
                
                ci_stack[stack] <- ci_check
                cj_stack[stack] <- cj_check
                
            }
            
            
            if (false_flag_ROW == T | false_flag_COL == T) {
                
                stack <- stack - 1
                print("logical error exist! go back checkpoint!!")
                print(paste0("stack : ",stack))
                col_df <- check_col
                row_df <- check_row
                puzzle <- stack_save[stack,,]
                #puzzle[ci_check,cj_check] <- -1
                checking <- F
                
                for (k in seq(stack+1,1,-1)) {
                    
                    if(k == stack+1) {puzzle[ci_stack[k],cj_stack[k]] <- -1}
                    else {puzzle[ci_stack[k],cj_stack[k]] <- 1}
                }
                
                stack_save[stack,,] <- puzzle
                
            }
            ################## elimination 2 ##################
            
            
            ###################### col ###########################
            for (i in 1:col_info) {
                
                false_flag <- F
                
                col_df_part <- col_df %>%
                    dplyr::filter(col_num == i)
                
                
                ## TEST ##
                for (j in 1:row_info) {
                    col_df_part[col_df_part[[paste0("V",i)]] == puzzle[j,i],]
                }
                
                ## 모순 체크 ##
                if(dim(col_df_part)[1] == 0) {
                    false_flag_COL <- T
                    break
                }
                ## 모순 체크 ##
                
                percent <- combin_check(df = col_df_part,row = row_info)
                percent_col[,i] <- percent
                
                #print(percent)
                for (j in 1:length(percent)) {
                    
                    if(percent[j] == 1){
                        
                        puzzle[j,i] <- 1
                        
                    } else if (percent[j] == 0) {
                        
                        puzzle[j,i] <- -1
                        
                    } else {
                        
                        if(puzzle[j,i] == -1 | puzzle[j,i] == 1) {
                            puzzle[j,i] <- puzzle[j,i]
                        } else {
                            puzzle[j,i] <- 0
                        }
                        
                    }
                    
                }
                
                #print(percent)
                #print(max(percent))
            }
            
            
            row_dis <- b_row - a_row
            col_dis <- b_col - a_col
            
            # print(percent_row)
            # print(percent_col)
            
            if(a_row == row_info & a_col == col_info) {
                stack <- 0
                print(paste0("stack : ",stack))
                break
            } else if (row_dis == 0 & col_dis == 0 & im != 0 & false_flag_ROW == F & false_flag_COL == F) {
                
                stack <- stack + 1
                print(paste0("stack : ",stack))
                
                
                if(stack == 2){
                    
                    check_col <- col_df
                    check_row <- row_df
                    stack_save[stack,,] <- puzzle
                    
                } else {
                    stack_save[stack,,] <- puzzle
                }
                
                check_col_stack <- col_df
                check_row_stack <- row_df
                check_puzzle_stack <- puzzle
                
                count <- 0 
                checking <- F
                
                
                
                for (ci in 1:row_info) {
                    for (cj in 1:col_info) {
                        
                        
                        total_percent[ci,cj] <- max(percent_row[ci,cj],percent_col[ci,cj])
                        
                    }
                }
                
                
                total_percent <- ifelse(total_percent == 1.0,-999,total_percent)
                
                cicj <- which(total_percent == max(total_percent), arr.ind = TRUE)
                cicj <- as.data.frame(cicj)
                
                ci <- cicj$row[1]
                cj <- cicj$col[1]
                print(paste0("fix",ci," ",cj))
                print(paste0("percent : ",total_percent[ci,cj]))
                check_puzzle_stack[ci,cj] = 1
                ci_check <- ci
                cj_check <- cj
                
                col_df <- check_col_stack
                row_df <- check_row_stack
                puzzle <- check_puzzle_stack
                puzzle[ci_check,cj_check] <- 1
                checking <- F
                
                ci_stack[stack] <- ci_check
                cj_stack[stack] <- cj_check
                
            }
            
            
            if (false_flag_ROW == T | false_flag_COL == T) {
                
                stack <- stack - 1
                print("logical error exist! go back checkpoint!!")
                print(paste0("stack : ",stack))
                col_df <- check_col
                row_df <- check_row
                puzzle <- stack_save[stack,,]
                #puzzle[ci_check,cj_check] <- -1
                checking <- F
                
                for (k in seq(stack+1,1,-1)) {
                    
                    if(k == stack+1) {puzzle[ci_stack[k],cj_stack[k]] <- -1}
                    else {puzzle[ci_stack[k],cj_stack[k]] <- 1}
                }
                
                stack_save[stack,,] <- puzzle
                
            }
            ################## elimination 2 ##################
            ss <- ss + 1
            # jpeg(paste0('result_',ss,'.jpg'),width = 800,height = 800)
            plot(puzzle,col=c('gray', 'white','black'), key=NULL, axis.col=NULL, axis.row=NULL, xlab='', ylab='')
            # dev.off()
            
        }

     

    • 풀이 결과

     

     참고 문헌

    [논문]

    • 없음

    [보고서]

    • 없음

    [URL]

    • 없음

     

     문의사항

    [기상학/프로그래밍 언어]

    • sangho.lee.1990@gmail.com

    [해양학/천문학/빅데이터]

    • saimang0804@gmail.com

      ss <- ss + 1
      # jpeg(paste0('result_',ss,'.jpg'),width = 800,height = 800)
      plot(puzzle,col=c('gray', 'white','black'), key=NULL, axis.col=NULL, axis.row=NULL, xlab='', ylab='')
      # dev.off()
     
    }
    반응형
    • 네이버 블러그 공유하기
    • 네이버 밴드에 공유하기
    • 페이스북 공유하기
    • 카카오스토리 공유하기