[关闭]
@darkproject 2020-02-13T11:29:24.000000Z 字数 6532 阅读 860

网易新人作业

作业


不使用Re,其余库用Python解析cpp文件中的宏定义,含#ifdef #ifndef #else #undef #endif 注释空格制作符换行,#define identifier token-stringopt, 其中 token-stringopt 只有兼容如下几种C/C++指定基本类型常量表示内容:整型,浮点,布尔(true, false),字符(忽略宽字符),字符串,及各上述基本类型组成的聚合。解析输出所有可用宏到字典,并存储到新的cpp文件。转换前后数据如下表:

image.png-37kB

image.png-805.4kB

  1. # -*- coding: UTF-8 -*-
  2. path = "D:/hw/a.cpp"
  3. save = "D:/hw/save.cpp"
  4. # 查表处理转义字符
  5. Escape = {'r': '\r', 't': '\t', 'v': '\v', "'": "\'", 'a': '\a', 'b': '\b', 'f': '\f',
  6. 'n': '\n', '"': '\"', '\\': '\\', '?': '\?', '0': '\0'}
  7. ReEscape = {'\a': '\\a', '\b': '\\b', '\f': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t',
  8. '\v': '\\v', "\'": "\\'", '\"': '\\"', '\\': '\\\\', '\?': '\\?'}
  9. def ParseAgg(value):
  10. # 状态机解析聚合类型
  11. # 栈匹配'{'与'}'
  12. # 返回值:元组
  13. temp = ''
  14. Res = []
  15. stack = []
  16. state = 0
  17. value = value[1:-1]
  18. for i in value:
  19. if state == 0 and i == ',':
  20. Res.append(temp.strip())
  21. print Res
  22. temp = ''
  23. elif state == 0 and i == '\"':
  24. state = 1
  25. temp += i
  26. elif state == 0 and i == '\'':
  27. state = 2
  28. temp += i
  29. elif state == 0 and i == '{':
  30. state = 3
  31. temp += i
  32. stack.append(True)
  33. elif state == 0:
  34. temp += i
  35. elif state == 1 and i == '\"':
  36. state = 0
  37. temp += i
  38. elif state == 1:
  39. temp += i
  40. elif state == 2 and i == '\'':
  41. state = 0
  42. temp += i
  43. elif state == 2:
  44. temp += i
  45. elif state == 3 and i == '}':
  46. temp += i
  47. stack.pop()
  48. if stack == []:
  49. state = 0
  50. elif state == 3 and i == '{':
  51. temp += i
  52. stack.append(True)
  53. elif state == 3:
  54. temp += i
  55. if len(temp) > 0:
  56. Res.append(temp.strip())
  57. tempRes = tuple(Res)
  58. Res = []
  59. for i in tempRes:
  60. Res.append(ParseValue(i)) # 递归解析聚合类型嵌套情况
  61. return tuple(Res)
  62. def ParseString(value):
  63. lstr = False
  64. Res = ''
  65. state = 0
  66. if value[0] == 'L':
  67. lstr = True
  68. value = value[1:]
  69. for i in value:
  70. if state == 0 and i == '\"':
  71. state = 1
  72. elif state == 1 and i == '\"':
  73. state = 0
  74. elif state == 1 and i == '\\': # 处理字符串中转义可能出现'"'误判为终止态
  75. state = 2
  76. elif state == 1:
  77. Res += i
  78. elif state == 2 and i in Escape:
  79. Res += Escape[i]
  80. state = 1
  81. elif state == 1:
  82. Res += i
  83. if lstr is True:
  84. return unicode(Res)
  85. else:
  86. return Res
  87. def RePareseString(value):
  88. Res = ''
  89. for i in value:
  90. if i in ReEscape:
  91. Res += ReEscape[i]
  92. else:
  93. Res += i
  94. return Res
  95. def ParseChar(value):
  96. value = value[1:-1]
  97. length = len(value)
  98. if length == 1:
  99. return ord(value)
  100. elif value[0] == '\\' and length > 2:
  101. if value[1] == 'x' or value[1] == 'X':
  102. value = '0' + value[1:]
  103. return int(value, 16)
  104. else:
  105. return int(value[1:], 8)
  106. elif value[0] == '\\':
  107. return ord(Escape[value[1]])
  108. def ParseValue(value):
  109. # 将cpp源类型解析为python内置类型
  110. sign = 1
  111. if value is None:
  112. return None
  113. # 字符串
  114. if value[0] == '\"' or value[0] == 'L':
  115. return ParseString(value)
  116. # 聚合类型
  117. if value[0] == '{':
  118. return ParseAgg(value)
  119. # bool类型
  120. if value == 'true':
  121. return True
  122. if value == 'false':
  123. return False
  124. # 前缀符号
  125. if value[0] == '-':
  126. sign = -1
  127. value = value[1:]
  128. elif value[0] == '+':
  129. value = value[1:]
  130. # 十六进制数
  131. if value[0:2] == '0x' or value[0:2] == '0X':
  132. value = value.replace('i64', '')
  133. value = value.replace('I64', '')
  134. value = value.rstrip("uUlL")
  135. return sign * int(value, 16)
  136. # 八进制数
  137. elif value[0] == '0' and len(value) > 1 and '.' not in value:
  138. return sign * int(value, 8)
  139. # 浮点数
  140. elif 'e' in value or 'E' in value or '.' in value: #
  141. if 'f' in value or 'F' in value or 'l' in value or 'L' in value:
  142. value = value.rstrip('fFlL')
  143. return sign * float(value)
  144. else:
  145. return sign * float(value)
  146. if value[0] == '\'':
  147. # 字符类型
  148. return ParseChar(value)
  149. else:
  150. # 十进制整数
  151. value = value.replace('i64', '')
  152. value = value.replace('I64', '')
  153. value = value.rstrip("uUlL")
  154. return sign * int(value)
  155. def ReParseValue(value):
  156. if isinstance(value, unicode):
  157. return 'L' + '"' + RePareseString(str(value)) + '"'
  158. if isinstance(value, str):
  159. return '"' + RePareseString(value) + '"'
  160. if isinstance(value, bool):
  161. if value is True:
  162. return "true"
  163. else:
  164. return "false"
  165. if isinstance(value, tuple):
  166. res = ''
  167. for i in value:
  168. res += ReParseValue(i) + ','
  169. res = '{' + res[:-1] + '}'
  170. return res
  171. return str(value)
  172. def RemoveComment(str):
  173. # 有限状态机去除读取的字符串中的注释部分
  174. # 注释可能存在的情况/**/多行注释、//单行注释、\\折行
  175. # 返回值:字符串
  176. state=0
  177. Res=''
  178. for i in str:
  179. if state==0 and i=='/':
  180. # 我们利用空格进行分词,出现#define/**/name注释紧贴情况,添加空格便于分词
  181. Res+=' '
  182. state=1
  183. elif state==0 and i=='\`':
  184. state=6
  185. elif state==0 and i=='\"':
  186. state=7
  187. elif state==0:
  188. Res+=i
  189. elif state==1 and i=='*':
  190. state=2
  191. elif state==1 and i=='/':
  192. state=4
  193. elif state==1:
  194. state=0
  195. Res+='/'+i
  196. elif state==2 and i=='*':
  197. state=3
  198. elif state==3 and i=='/':
  199. state=0
  200. elif state==3 and i=='*':
  201. state=3
  202. elif state==3:
  203. state=2
  204. elif state==4 and i=='\n':
  205. state=0
  206. Res+=i
  207. elif state==4 and i=='\\':
  208. state=5
  209. elif state==5 and i=='\\':
  210. state=5
  211. elif state==5:
  212. state=4
  213. elif state==6 and i=='\\':
  214. state=6
  215. elif state==6 and i=='\`':
  216. state=0
  217. Res+=i
  218. elif state==7 and i=='\\':
  219. state=7
  220. elif state==7 and i=='\"':
  221. state=0
  222. Res+=i
  223. if state == 6 or state == 7:
  224. Res+=i
  225. return Res
  226. def RemoveTab(code):
  227. # 处理不在“”和''内的制表符
  228. Res = []
  229. temp = ''
  230. state = 0
  231. for str in code:
  232. for i in str:
  233. if state == 0 and i == '\t':
  234. temp += ' '
  235. elif state == 0 and i == '\"':
  236. temp += i
  237. state = 1
  238. elif state == 0 and i == '\'':
  239. temp += i
  240. state = 2
  241. elif state == 1 and i == '\"':
  242. temp += i
  243. state = 0
  244. elif state == 2 and i == '\'':
  245. temp += i
  246. state = 0
  247. else:
  248. temp += i
  249. Res.append(temp)
  250. temp = ''
  251. return Res
  252. def To_List(str):
  253. # 将处理掉注释的字符串按行分割,并清理多余符号
  254. # 返回值:列表 ex:[#define a 1]
  255. code = []
  256. ans = []
  257. start_index = 0
  258. end_index = 0
  259. while end_index < len(str):
  260. if str[end_index] == '\n':
  261. code.append(str[start_index:end_index])
  262. start_index = end_index + 1
  263. end_index += 1
  264. if str[start_index:] != '':
  265. code.append(str[start_index:])
  266. print code
  267. code = RemoveTab(code)
  268. for t in code:
  269. temp = t.strip().split(' ', 1)
  270. ins = temp[0].strip()
  271. if ins == '#': # 处理'#'未紧贴宏指令的情况
  272. temp = temp[1].strip().split(' ', 1)
  273. ins += ''.join(temp[0])
  274. if len(temp) > 1:
  275. temp2 = temp[1].strip().split(' ', 1)
  276. if len(temp2) > 1:
  277. temp2[1] = temp2[1].strip()
  278. ans.append(ins + ' ' + temp2[0] + ' ' + temp2[1].strip())
  279. else:
  280. ans.append(ins + ' ' + temp2[0])
  281. else:
  282. if ins != '':
  283. ans.append(ins)
  284. print ans
  285. return ans
  286. def ParseMacro(code, dict):
  287. # 进行分词,将宏的定义语法分为指令 宏名 值 三部分
  288. # 2个栈匹配宏定义分支逻辑,一个栈匹配#ifdef #ifndef #else,
  289. # 另一个栈记录分支的状态(防止前一个分支不满足时,内部遇到一个分支不满足,此时内部分支else时flag将会为true会执行内部#define)
  290. index = 0
  291. stack = []
  292. state = [True]
  293. flag = True
  294. while index < len(code):
  295. temp = code[index].split(' ', 2)
  296. length = len(temp)
  297. macro_ins = temp[0]
  298. if macro_ins == '#define' and flag:
  299. if length > 2:
  300. dict[temp[1]] = ParseValue(temp[2])
  301. else:
  302. dict[temp[1]] = None
  303. elif macro_ins == '#ifdef':
  304. if temp[1] in dict:
  305. state.append(True and state[-1])
  306. stack.append(True)
  307. else:
  308. state.append(False)
  309. stack.append(False)
  310. flag = False
  311. elif macro_ins == '#ifndef':
  312. if temp[1] not in dict:
  313. state.append(True and state[-1])
  314. stack.append(True)
  315. else:
  316. state.append(False)
  317. stack.append(False)
  318. flag = False
  319. elif macro_ins == '#else':
  320. if not stack[-1] and state[-2]:
  321. state[-1] = True
  322. flag = True
  323. else:
  324. state[-1] = False
  325. flag = False
  326. elif macro_ins == '#undef' and flag:
  327. if temp[1] in dict:
  328. del dict[temp[1]]
  329. elif macro_ins == '#endif':
  330. stack.pop()
  331. state.pop()
  332. flag = state[-1]
  333. index += 1
  334. class PyMacroParser:
  335. def __init__(self):
  336. self.dict = {}
  337. self.code = []
  338. self.pDefine = []
  339. def load(self, f):
  340. try:
  341. with open(f, 'r') as p:
  342. str = RemoveComment(p.read())
  343. self.code = To_List(str)
  344. except IOError:
  345. print "Not Find File!"
  346. def preDefine(self, s):
  347. self.dict = {}
  348. self.pDefine = []
  349. temp = s.split(";")
  350. for i in temp:
  351. if (i != ''):
  352. self.pDefine.append(i.strip())
  353. def dumpDict(self):
  354. self.dict = {}
  355. for i in self.pDefine:
  356. self.dict[i] = None
  357. ParseMacro(self.code, self.dict)
  358. print self.dict
  359. return self.dict
  360. def dump(self, f):
  361. self.dict = {}
  362. for i in self.pDefine:
  363. self.dict[i] = None
  364. ParseMacro(self.code, self.dict)
  365. try:
  366. with open(f, 'w') as p:
  367. for key in self.dict:
  368. if self.dict[key] is None:
  369. p.write('#define ' + key + '\n')
  370. else:
  371. p.write('#define ' + key + ' ')
  372. p.write(ReParseValue(self.dict[key]))
  373. p.write('\n')
  374. except IOError:
  375. print "Not Write File!"
  376. if __name__ == '__main__':
  377. t = PyMacroParser()
  378. t.load(path)
  379. t.preDefine("")
  380. t.dumpDict()
  381. t.dump(save)
添加新批注
在作者公开此批注前,只有你和作者可见。
回复批注