程序代写代做代考 python Task1_Jaccard

Task1_Jaccard

In [2]:

import numpy as np

from pyspark import SparkContext, SparkConf
from pyspark.mllib.linalg import SparseVector

import sys,os
from operator import add
import time
import copy
import math

In [8]:

sc.stop()

In [12]:

tTotalStart = time.time()

In [9]:

sc = SparkContext(appName=”Zhendong_hw3_task1″)

#read data
INPUT_CSV = “./Data/video_small_num.csv”
mydata = sc.textFile(INPUT_CSV,None,False)
dataHeader = mydata.first()
mydata = mydata.filter(lambda x: x!= dataHeader).map(lambda x: x.split(‘,’))
print mydata

PythonRDD[3] at RDD at PythonRDD.scala:48

In [13]:

def compareValue(old, new):
if new < old: return new else: return old In [14]: def minHash(val, l): val = 1000 for i in range(len(l)): if l[i] < val: val = l[i] return val In [ ]: