HENU编译原理实验一 词法分析器代码

发布时间 2023-06-26 00:24:54作者: ccz9729

词法分析器

#include <bits/stdc++.h>
using namespace std;


vector<string> key, identify, symbol;
unordered_map<string, int> key_map, identify_map, symbol_map; // value
unordered_map<string, int> letter_map; // Type

int key_offset = 1;
int symbol_offset = 12;

struct node {
  int type;
  string val;
};  

const int numbers_type = 27;
const int identify_type = 26;
vector<node> tokens; //词法分析结果
int index = 0; //tokens的下标

void init() {
  string str_key = "begin  end  if  then  while  do  const  var  call  procedure  odd", s;
  string str_symbol = "+  -  *  /  =  #  <  >  :=  (  )  ,  .  ;";
  stringstream key_in;
  key_in << str_key;
  while (key_in >> s) {
    key.push_back(s);
    key_map[s] = int(key.size()) - 1;
    letter_map[s] = int(key.size()) - 1 + key_offset;
  }
  stringstream symbolin;
  symbolin << str_symbol;
  while (symbolin >> s) {
    symbol.push_back(s);
    symbol_map[s] = int(symbol.size()) - 1;
    letter_map[s] = int(symbol.size()) - 1 + symbol_offset;
  }

}

void read(vector<string> &ans) {
  if (not ans.empty()) ans.clear();
  string str, res, s; 
  while (getline(cin, str)) {
    if (str.find("//") != string::npos) {
      str.erase(str.find("//"));
    }
    res += str; res.push_back('\n');
  }
  
  stringstream ss; ss << res;
  int flag = 1;
  while (ss >> s) {
    if (s == "(*") flag ^= 1;
    
    if (flag) {
      ans.push_back(s);
    }

    if (s == "*)") flag ^= 1;
  }
}
// (种别,属性值)
void LexicalAnalysis(vector<string> &str, vector<node> &ans) {
  if (not ans.empty()) ans.clear();
  for (string s: str) {
    // cout << s << " ";
    if (key_map.count(s)) { //
      ans.push_back({letter_map[s], "-"});
    } else if (symbol_map.count(s)) {
      if (int(s.size()) > 10) {
        cout << "symbol's size is too long!\n";
        assert(false);
      }
      ans.push_back({letter_map[s], "-"});
    } else if (identify_map.count(s)) {
      ans.push_back({identify_type, to_string(identify_map[s])});
    } else {
      // continue;
      int ns = s.size();
      for (int i = 0; i < ns; i ++ ) {
        if (isdigit(s[i])) {
          string t; t += s[i];
          int j = i; while (j + 1 < ns and isdigit(s[j + 1])) {
            ++ j;
            t += s[j];
          }
          ans.push_back({numbers_type, t});    
          i = j;
        } else if (isalpha(s[i])) {
          string t; t += s[i];
          int j = i; while (j + 1 < ns and (isalpha(s[j + 1]) or isdigit(s[j + 1]))) {
            ++ j;
            t += s[j];
          }
          if (key_map.count(t)) {
            ans.push_back({letter_map[t], "-"});    
          } else {
            if (int(t.size()) > 10) {
              cout << "identify's size is too long!\n";
              assert(false);
            }
            if (!identify_map.count(t)) {
              identify.push_back(t);
              int new_val = (int)identify.size() - 1;
              identify_map[t] = new_val;
            }
            ans.push_back({identify_type, to_string(identify_map[t])});
          }
          i = j;
        } else {
          string t; t += s[i];
          if (symbol_map.count(t)) {
            ans.push_back({letter_map[t], "-"});
          } else {
            if (i + 1 >= (int)s.size()) {
              cout << "illegal!\n";
              assert(false);
            }
            t += s[++ i];
            if (symbol_map.count(t)) {
              ans.push_back({letter_map[t], "-"});
            } else {
              assert(false);
            }
          }
        }
      }
    }
  }
}


int main() {
  init();
  vector<string> str;
  read(str);
  vector<node> ans;
  LexicalAnalysis(str, ans);
  for (auto [type, val]: ans) {
    cout << "(" << type << "," << val << ")\n";
  }
  return 0;
}