现在能做到的是把每个字的hash值求出并存放在string[]中,接下来就不太会了,求大神指导
1条回答 默认 最新
- Allen@@ 2015-04-27 08:21关注
//words [0]为属性 [1]为权重
//hashbits hashCode权重
//return SimHash串
getSimHash(String[][] words,int hashbits)
------------------------------------------------------------------
//计算汉明距离
//str1 simHash生成的code
//str2
//return 整形距离越小越相似
getDistance(str1,str2)
----------------------------------------------------------------------
下面是代码
package com.yeahmobi.ymconv.util; public class MySimHash { public static String getSimHash(String[][] words, int hashbits) { double[] hash = new double[hashbits]; for (int i = 0; i < words.length; i++) { long t = MurmurHash.hash64(words[i][0]); // long t = hash(words[i][0], 64).longValue(); String str = getZero(Long.toBinaryString(t), hashbits); for (int j = 0; j < str.length(); j++) { int weights = Integer.parseInt(words[i][1]) <= 0 ? 1 : Integer.parseInt(words[i][1]); int c = Integer.parseInt(str.charAt(j) + ""); if (c == 1) hash[j] = hash[j] + (weights); else hash[j] = hash[j] + (-weights); } } String hash1 = ""; for (double d : hash) { hash1 += d > 0 ? "1" : "0"; } return hash1; } public static String getZero(String str, int hashbits) { return String.format("%" + hashbits + "s", str).replace(" ", "0"); } public static int getDistance(String str1, String str2) { int distance; if (str1.length() != str2.length()) { distance = -1; } else { distance = 0; for (int i = 0; i < str1.length(); i++) { if (str1.charAt(i) != str2.charAt(i)) { distance++; } } } return distance; } public static void main(String[] args) { // String s1 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "3" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64); // String s2 = MySimHash.getSimHash(new String[][] { { "177.224.174.214", "1" }, { "mx", "1" }, { "775", "1" }, { "6177", "1" }, { "2478822", "1" }, { "generic", "1" }, { "storm", "1" }, { "android", "1" }, { "4.2.2", "1" } }, 64); // String s3 = MySimHash.getSimHash(new String[][] { { "5.246.82.36", "1" }, { "sdf", "1" }, { "663", "1" }, { "333", "1" }, { "55", "0" }, { "sd", "1" }, { "er", "1" }, { "34", "1" }, { "sdfasdf", "1" }, { "hh", "1" } }, 64); // String s4 = MySimHash.getSimHash(new String[][] { { "189.132.168.157", "1" }, { "mx", "1" }, { "390", "1" }, { "3203", "1" }, { "2342277", "1" }, { "samsung", "1" }, { "gt-i8190l", "1" }, { "android", "1" }, { "4.1.2", "1" } }, 64); // String s5 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "1" }, { "mx", "1" }, { "775", "3" }, { "60541", "1" }, { "2342256", "1" }, { "alcatel", "1" }, { "onetouch5020", "1" }, { "android", "1" }, { "4.1.1", "1" }, { "hh", "1" } }, 64); // String s6 = MySimHash.getSimHash(new String[][] { { "187.237.239.25", "3" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64); // String s7 = MySimHash.getSimHash(new String[][] { { "187.237.239.16", "1" }, { "mx", "3" }, { "775", "3" }, { "60541", "3" }, { "2342256", "3" }, { "alcatel", "3" }, { "onetouch5020", "3" }, { "android", "3" }, { "4.1.1", "3" }, { "hh", "3" } }, 64); // System.out.println("----------"); // System.out.println(MySimHash.getDistance(s1, s2)); // System.out.println(MySimHash.getDistance(s1, s3)); // System.out.println(MySimHash.getDistance(s1, s4)); // System.out.println(MySimHash.getDistance(s1, s5)); // System.out.println(MySimHash.getDistance(s1, s6)); // System.out.println(MySimHash.getDistance(s1, s7)); // // System.out.println(s1); // System.out.println(s2); // System.out.println(s3); // System.out.println(s4); // System.out.println(s5); // System.out.println(s6); } }
解决 无用评论 打赏 举报
悬赏问题
- ¥15 用stata实现聚类的代码
- ¥15 请问paddlehub能支持移动端开发吗?在Android studio上该如何部署?
- ¥170 如图所示配置eNSP
- ¥20 docker里部署springboot项目,访问不到扬声器
- ¥15 netty整合springboot之后自动重连失效
- ¥15 悬赏!微信开发者工具报错,求帮改
- ¥20 wireshark抓不到vlan
- ¥20 关于#stm32#的问题:需要指导自动酸碱滴定仪的原理图程序代码及仿真
- ¥20 设计一款异域新娘的视频相亲软件需要哪些技术支持
- ¥15 stata安慰剂检验作图但是真实值不出现在图上